Message ID | 1286261630-5085-1-git-send-email-bmt@zurich.ibm.com |
---|---|
State | Not Applicable, archived |
Delegated to: | David Miller |
Headers | show |
On 10/05/2010 01:53 AM, Bernard Metzler wrote: > --- > drivers/infiniband/hw/siw/iwarp.h | 324 +++++++++++++++++++++++++++++++++++++ > 1 files changed, 324 insertions(+), 0 deletions(-) > create mode 100644 drivers/infiniband/hw/siw/iwarp.h > > diff --git a/drivers/infiniband/hw/siw/iwarp.h b/drivers/infiniband/hw/siw/iwarp.h > new file mode 100644 > index 0000000..762c1d3 > --- /dev/null > +++ b/drivers/infiniband/hw/siw/iwarp.h > @@ -0,0 +1,324 @@ > +/* > + * Software iWARP device driver for Linux > + * > + * Authors: Bernard Metzler<bmt@zurich.ibm.com> > + * Fredy Neeser<nfd@zurich.ibm.com> > + * > + * Copyright (c) 2008-2010, IBM Corporation > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above copyright notice, > + * this list of conditions and the following disclaimer. > + * > + * - Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * - Neither the name of IBM nor the names of its contributors may be > + * used to endorse or promote products derived from this software without > + * specific prior written permission. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + */ > + > +#ifndef _IWARP_H > +#define _IWARP_H > + > +#include<rdma/rdma_user_cm.h> /* RDMA_MAX_PRIVATE_DATA */ > +#include<linux/types.h> > +#include<asm/byteorder.h> > + > + > +#define RDMAP_VERSION 1 > +#define DDP_VERSION 1 > +#define MPA_REVISION_1 1 > +#define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA > +#define MPA_KEY_REQ "MPA ID Req Frame" > +#define MPA_KEY_REP "MPA ID Rep Frame" > + > +struct mpa_rr_params { > +#if defined(__LITTLE_ENDIAN_BITFIELD) > + __u16 res:5, > + r:1, > + c:1, > + m:1, > + rev:8; > +#elif defined(__BIG_ENDIAN_BITFIELD) > + __u16 m:1, > + c:1, > + r:1, > + res:5, > + rev:8; > +#else > +#error "Adjust your<asm/byteorder.h> defines" > +#endif > + __u16 pd_len; > +}; > + > +/* > + * MPA request/reply header > + */ > +struct mpa_rr { > + __u8 key[16]; > + struct mpa_rr_params params; > +}; > + > +/* > + * Don't change the layout/size of this struct! > + */ > +struct mpa_marker { > + __u16 rsvd; > + __u16 fpdu_hmd; /* FPDU header-marker distance (= MPA's FPDUPTR) */ > +}; > + > +#define MPA_MARKER_SPACING 512 > +#define MPA_HDR_SIZE 2 > + > +/* > + * MPA marker size: > + * - Standards-compliant marker insertion: Use sizeof(struct mpa_marker) > + * - "Invisible markers" for testing sender's marker insertion > + * without affecting receiver: Use 0 > + */ > +#define MPA_MARKER_SIZE sizeof(struct mpa_marker) > + > + > +/* > + * maximum MPA trailer > + */ > +struct mpa_trailer { > + char pad[4]; > + __u32 crc; > +}; > + > +#define MPA_CRC_SIZE 4 > + > + > +/* > + * Common portion of iWARP headers (MPA, DDP, RDMAP) > + * for any FPDU > + */ > +struct iwarp_ctrl { > + __u16 mpa_len; > +#if defined(__LITTLE_ENDIAN_BITFIELD) > + __u16 dv:2, /* DDP Version */ > + rsvd:4, /* DDP reserved, MBZ */ > + l:1, /* DDP Last flag */ > + t:1, /* DDP Tagged flag */ > + opcode:4, /* RDMAP opcode */ > + rsv:2, /* RDMAP reserved, MBZ */ > + rv:2; /* RDMAP Version, 01 for IETF, 00 for RDMAC */ > +#elif defined(__BIG_ENDIAN_BITFIELD) > + __u16 t:1, /* DDP Tagged flag */ > + l:1, /* DDP Last flag */ > + rsvd:4, /* DDP reserved, MBZ */ > + dv:2, /* DDP Version */ > + rv:2, /* RDMAP Version, 01 for IETF, 00 for RDMAC */ > + rsv:2, /* RDMAP reserved, MBZ */ > + opcode:4; /* RDMAP opcode */ > +#else > +#error "Adjust your<asm/byteorder.h> defines" > +#endif > +}; > + > + > +struct rdmap_terminate_ctrl { > +#if defined(__LITTLE_ENDIAN_BITFIELD) > + __u32 etype:4, > + layer:4, > + ecode:8, > + rsvd1:5, > + r:1, > + d:1, > + m:1, > + rsvd2:8; > +#elif defined(__BIG_ENDIAN_BITFIELD) > + __u32 layer:4, > + etype:4, > + ecode:8, > + m:1, > + d:1, > + r:1, > + rsvd1:5, > + rsvd2:8; > +#else > +#error "Adjust your<asm/byteorder.h> defines" > +#endif > +}; > + > + > +struct iwarp_rdma_write { > + struct iwarp_ctrl ctrl; > + __u32 sink_stag; > + __u64 sink_to; > +} __attribute__((__packed__)); > + > +struct iwarp_rdma_rreq { > + struct iwarp_ctrl ctrl; > + __u32 rsvd; > + __u32 ddp_qn; > + __u32 ddp_msn; > + __u32 ddp_mo; > + __u32 sink_stag; > + __u64 sink_to; > + __u32 read_size; > + __u32 source_stag; > + __u64 source_to; > +} __attribute__((__packed__)); > + > +struct iwarp_rdma_rresp { > + struct iwarp_ctrl ctrl; > + __u32 sink_stag; > + __u64 sink_to; > +} __attribute__((__packed__)); > + > +struct iwarp_send { > + struct iwarp_ctrl ctrl; > + __u32 rsvd; > + __u32 ddp_qn; > + __u32 ddp_msn; > + __u32 ddp_mo; > +} __attribute__((__packed__)); > + > +struct iwarp_send_inv { > + struct iwarp_ctrl ctrl; > + __u32 inval_stag; > + __u32 ddp_qn; > + __u32 ddp_msn; > + __u32 ddp_mo; > +} __attribute__((__packed__)); > + > +struct iwarp_terminate { > + struct iwarp_ctrl ctrl; > + __u32 rsvd; > + __u32 ddp_qn; > + __u32 ddp_msn; > + __u32 ddp_mo; > + struct rdmap_terminate_ctrl term_ctrl; > +} __attribute__((__packed__)); > + > + > +/* > + * Common portion of iWARP headers (MPA, DDP, RDMAP) > + * for an FPDU carrying an untagged DDP segment > + */ > +struct iwarp_ctrl_untagged { > + struct iwarp_ctrl ctrl; > + __u32 rsvd; > + __u32 ddp_qn; > + __u32 ddp_msn; > + __u32 ddp_mo; > +} __attribute__((__packed__)); > + > +/* > + * Common portion of iWARP headers (MPA, DDP, RDMAP) > + * for an FPDU carrying a tagged DDP segment > + */ > +struct iwarp_ctrl_tagged { > + struct iwarp_ctrl ctrl; > + __u32 ddp_stag; > + __u64 ddp_to; > +} __attribute__((__packed__)); > + > All of the above header structures should use __beXX types since the fields are all in Network Byte Order. Also, did you run sparse on the patches (Documentation/sparse.txt)? > +union iwarp_hdrs { > + struct iwarp_ctrl ctrl; > + struct iwarp_ctrl_untagged c_untagged; > + struct iwarp_ctrl_tagged c_tagged; > + struct iwarp_rdma_write rwrite; > + struct iwarp_rdma_rreq rreq; > + struct iwarp_rdma_rresp rresp; > + struct iwarp_terminate terminate; > + struct iwarp_send send; > + struct iwarp_send_inv send_inv; > +}; > + > + > +#define MPA_MIN_FRAG ((sizeof(union iwarp_hdrs) + MPA_CRC_SIZE)) > + > +enum ddp_etype { > + DDP_ETYPE_CATASTROPHIC = 0x0, > + DDP_ETYPE_TAGGED_BUF = 0x1, > + DDP_ETYPE_UNTAGGED_BUF = 0x2, > + DDP_ETYPE_RSVD = 0x3 > +}; > + > +enum ddp_ecode { > + DDP_ECODE_CATASTROPHIC = 0x00, > + /* Tagged Buffer Errors */ > + DDP_ECODE_T_INVALID_STAG = 0x00, > + DDP_ECODE_T_BASE_BOUNDS = 0x01, > + DDP_ECODE_T_STAG_NOT_ASSOC = 0x02, > + DDP_ECODE_T_TO_WRAP = 0x03, > + DDP_ECODE_T_DDP_VERSION = 0x04, > + /* Untagged Buffer Errors */ > + DDP_ECODE_UT_INVALID_QN = 0x01, > + DDP_ECODE_UT_INVALID_MSN_NOBUF = 0x02, > + DDP_ECODE_UT_INVALID_MSN_RANGE = 0x03, > + DDP_ECODE_UT_INVALID_MO = 0x04, > + DDP_ECODE_UT_MSG_TOOLONG = 0x05, > + DDP_ECODE_UT_DDP_VERSION = 0x06 > +}; > + > + > +enum rdmap_untagged_qn { > + RDMAP_UNTAGGED_QN_SEND = 0, > + RDMAP_UNTAGGED_QN_RDMA_READ = 1, > + RDMAP_UNTAGGED_QN_TERMINATE = 2, > + RDMAP_UNTAGGED_QN_COUNT = 3 > +}; > + > +enum rdmap_etype { > + RDMAP_ETYPE_CATASTROPHIC = 0x0, > + RDMAP_ETYPE_REMOTE_PROTECTION = 0x1, > + RDMAP_ETYPE_REMOTE_OPERATION = 0x2 > +}; > + > +enum rdmap_ecode { > + RDMAP_ECODE_INVALID_STAG = 0x00, > + RDMAP_ECODE_BASE_BOUNDS = 0x01, > + RDMAP_ECODE_ACCESS_RIGHTS = 0x02, > + RDMAP_ECODE_STAG_NOT_ASSOC = 0x03, > + RDMAP_ECODE_TO_WRAP = 0x04, > + RDMAP_ECODE_RDMAP_VERSION = 0x05, > + RDMAP_ECODE_UNEXPECTED_OPCODE = 0x06, > + RDMAP_ECODE_CATASTROPHIC_STREAM = 0x07, > + RDMAP_ECODE_CATASTROPHIC_GLOBAL = 0x08, > + RDMAP_ECODE_STAG_NOT_INVALIDATE = 0x09, > + RDMAP_ECODE_UNSPECIFIED = 0xff > +}; > + > +enum rdmap_elayer { > + RDMAP_ERROR_LAYER_RDMA = 0x00, > + RDMAP_ERROR_LAYER_DDP = 0x01, > + RDMAP_ERROR_LAYER_LLP = 0x02 /* eg., MPA */ > +}; > + > +enum rdma_opcode { > + RDMAP_RDMA_WRITE = 0x0, > + RDMAP_RDMA_READ_REQ = 0x1, > + RDMAP_RDMA_READ_RESP = 0x2, > + RDMAP_SEND = 0x3, > + RDMAP_SEND_INVAL = 0x4, > + RDMAP_SEND_SE = 0x5, > + RDMAP_SEND_SE_INVAL = 0x6, > + RDMAP_TERMINATE = 0x7, > + RDMAP_NOT_SUPPORTED = RDMAP_TERMINATE + 1 > +}; > + > +#endif > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
<snip> > > + */ > > +struct iwarp_ctrl_tagged { > > + struct iwarp_ctrl ctrl; > > + __u32 ddp_stag; > > + __u64 ddp_to; > > +} __attribute__((__packed__)); > > + > > > > All of the above header structures should use __beXX types since the > fields are all in Network Byte Order. > right. will get fixed. > Also, did you run sparse on the patches (Documentation/sparse.txt)? > > frankly, i was not aware. sorry. will do. bernard. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Oct 5, 2010 at 8:53 AM, Bernard Metzler <bmt@zurich.ibm.com> wrote:
> +} __attribute__((__packed__));
Apparently you make extensive use of the packed attribute. Please read
this blog entry, in which it is explained why this is harmful:
http://digitalvampire.org/blog/index.php/2006/07/31/why-you-shouldnt-use-__attribute__packed/
Bart.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
linux-rdma-owner@vger.kernel.org wrote on 10/05/2010 06:10:45 PM: > On Tue, Oct 5, 2010 at 8:53 AM, Bernard Metzler <bmt@zurich.ibm.com> wrote: > > +} __attribute__((__packed__)); > > Apparently you make extensive use of the packed attribute. Please read > this blog entry, in which it is explained why this is harmful: > > http://digitalvampire.org/blog/index.php/2006/07/31/why-you- > shouldnt-use-__attribute__packed/ > right, thanks, i was not aware of that big overhead. but...there are reasons why we may need it _packed_: (1) to get the size of the packet hdr (2) to read/write the hdr content (1) is not harmful. (2) could be limited to just one read/write operation per rx/tx path and all other access could be done on a structure which contains the host representation of the hdr content. both structures could coexist in a union, part of the rx or tx context. would that satisfy your complain? many thanks, bernard. > Bart. > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Oct 6, 2010 at 2:52 PM, Bernard Metzler <BMT@zurich.ibm.com> wrote: > > > linux-rdma-owner@vger.kernel.org wrote on 10/05/2010 06:10:45 PM: > > > On Tue, Oct 5, 2010 at 8:53 AM, Bernard Metzler <bmt@zurich.ibm.com> > wrote: > > > +} __attribute__((__packed__)); > > > > Apparently you make extensive use of the packed attribute. Please read > > this blog entry, in which it is explained why this is harmful: > > > > http://digitalvampire.org/blog/index.php/2006/07/31/why-you- > > shouldnt-use-__attribute__packed/ > > > right, thanks, i was not aware of that > big overhead. > but...there are reasons why we may need it _packed_: > (1) to get the size of the packet hdr > (2) to read/write the hdr content > > (1) is not harmful. (2) could be limited to > just one read/write operation per rx/tx path > and all other access could be done on a > structure which contains the host representation > of the hdr content. both structures could coexist > in a union, part of the rx or tx context. > would that satisfy your complain? Please have a look at e.g. the header file <scsi/srp.h>. As you can see in that header file, __attribute__((__packed__)) is only specified when absolutely necessary. Bart. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Oct 06, 2010 at 02:52:46PM +0200, Bernard Metzler wrote: > > > linux-rdma-owner@vger.kernel.org wrote on 10/05/2010 06:10:45 PM: > > > On Tue, Oct 5, 2010 at 8:53 AM, Bernard Metzler <bmt@zurich.ibm.com> > wrote: > > > +} __attribute__((__packed__)); > > > > Apparently you make extensive use of the packed attribute. Please read > > this blog entry, in which it is explained why this is harmful: > > > > http://digitalvampire.org/blog/index.php/2006/07/31/why-you- > > shouldnt-use-__attribute__packed/ > > > right, thanks, i was not aware of that > big overhead. > but...there are reasons why we may need it _packed_: > (1) to get the size of the packet hdr > (2) to read/write the hdr content It is actually a little more complicated than just this. I assume you are casting the structures over packet payloads? In this case you have to guarentee alignment (or used packed everywhere). Does iwarp have provisions for alignment? If so you can construct your bitfields using the alignment type, ie if iWarp guarantees 4 bytes then the biggest type you can use is u32 - then you can avoid using packed. Mind you, I'm not sure how to guarentee alignment when you consider all the possible sources of unalignment in the stack: TCP, IP, L2 stack ? Otherwise you need a strategy for dealing with unaligned data for portability. :( Jason -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2010-10-06 at 11:25 -0600, Jason Gunthorpe wrote: > On Wed, Oct 06, 2010 at 02:52:46PM +0200, Bernard Metzler wrote: > It is actually a little more complicated than just this. I assume you > are casting the structures over packet payloads? In this case you > have to guarentee alignment (or used packed everywhere). Does iwarp > have provisions for alignment? If so you can construct your bitfields > using the alignment type, ie if iWarp guarantees 4 bytes then the > biggest type you can use is u32 - then you can avoid using packed. > > Mind you, I'm not sure how to guarentee alignment when you consider > all the possible sources of unalignment in the stack: TCP, IP, L2 stack ? You don't have to. The TCP stack, IIRC, requires the architecture to fix up misaligned accesses, or at least it used to. It will try to keep things aligned if possible -- see the comment above NET_IP_ALIGN in include/linux/skbuff.h. The structures in Bernard's header have all fields naturally aligned, so no need for the packed attribute and its baggage. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Oct 06, 2010 at 02:22:55PM -0400, David Dillow wrote: > On Wed, 2010-10-06 at 11:25 -0600, Jason Gunthorpe wrote: > > On Wed, Oct 06, 2010 at 02:52:46PM +0200, Bernard Metzler wrote: > > It is actually a little more complicated than just this. I assume you > > are casting the structures over packet payloads? In this case you > > have to guarentee alignment (or used packed everywhere). Does iwarp > > have provisions for alignment? If so you can construct your bitfields > > using the alignment type, ie if iWarp guarantees 4 bytes then the > > biggest type you can use is u32 - then you can avoid using packed. > > > > Mind you, I'm not sure how to guarentee alignment when you consider > > all the possible sources of unalignment in the stack: TCP, IP, L2 stack ? > > You don't have to. The TCP stack, IIRC, requires the architecture to fix > up misaligned accesses, or at least it used to. It will try to keep > things aligned if possible -- see the comment above NET_IP_ALIGN in > include/linux/skbuff.h. I was under the impression that this was just to align the IP header. There are quite a variety of options and additional stuff that can be inserted at each layer, VLAN for ethernet, IP option/extension headers, TCP option headers, etc, etc. I seem to remember that because of all this there are cases where processing unaligned payloads is unavoidable? Hence my question if iWarp has restrictions (particularly on TCP) that might guarentee alignment. > The structures in Bernard's header have all fields naturally > aligned, so no need for the packed attribute and its baggage. No, they arent: > +struct iwarp_rdma_write { > + struct iwarp_ctrl ctrl; > + __u32 sink_stag; > + __u64 sink_to; > +} __attribute__((__packed__)); For instance has sink_to unaligned if the maximum aligment of the payload is only guarenteed to be 4. The proper thing to do in this case is probably __attribute__((__packed__, __aligned__(4))) if that works the way I think.. :) Jason -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2010-10-06 at 12:37 -0600, Jason Gunthorpe wrote: > On Wed, Oct 06, 2010 at 02:22:55PM -0400, David Dillow wrote: > > On Wed, 2010-10-06 at 11:25 -0600, Jason Gunthorpe wrote: > > > On Wed, Oct 06, 2010 at 02:52:46PM +0200, Bernard Metzler wrote: > > > It is actually a little more complicated than just this. I assume you > > > are casting the structures over packet payloads? In this case you > > > have to guarentee alignment (or used packed everywhere). Does iwarp > > > have provisions for alignment? If so you can construct your bitfields > > > using the alignment type, ie if iWarp guarantees 4 bytes then the > > > biggest type you can use is u32 - then you can avoid using packed. > > > > > > Mind you, I'm not sure how to guarentee alignment when you consider > > > all the possible sources of unalignment in the stack: TCP, IP, L2 stack ? > > > > You don't have to. The TCP stack, IIRC, requires the architecture to fix > > up misaligned accesses, or at least it used to. It will try to keep > > things aligned if possible -- see the comment above NET_IP_ALIGN in > > include/linux/skbuff.h. > > I was under the impression that this was just to align the IP > header. It is, and it assumes an ethernet header of 14 bytes. As you note, VLANs and everything else can mess with that, which is why the TCP stack requires the architecture to handle unaligned accesses. It's a bonus if iWarp can guarantee an alignment WRT the IP or TCP header, but not required. > > The structures in Bernard's header have all fields naturally > > aligned, so no need for the packed attribute and its baggage. > > No, they arent: > > > +struct iwarp_rdma_write { > > + struct iwarp_ctrl ctrl; > > + __u32 sink_stag; > > + __u64 sink_to; > > +} __attribute__((__packed__)); > > For instance has sink_to unaligned if the maximum aligment of the > payload is only guarenteed to be 4. The proper thing to do in this > case is probably __attribute__((__packed__, __aligned__(4))) if that > works the way I think.. :) struct iwarp_ctrl consists of 2 u16s, so sink_stag is aligned to 4 bytes from the start of the struct iwarp_rdma_write, and sink_to is at 8 bytes -- so they are naturally aligned with respect to the structure. The compiler will not add padding, so no need for the packed attribute. Note I'm not talking about alignment in memory -- that'll depend on where the data starts, and as I mentioned, the stack doesn't make any guarantees about that, relying on the architecture to fixup any misaligned accesses. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Dillow <dave@thedillows.org> wrote on 10/06/2010 09:31:44 PM: > On Wed, 2010-10-06 at 12:37 -0600, Jason Gunthorpe wrote: > > On Wed, Oct 06, 2010 at 02:22:55PM -0400, David Dillow wrote: > > > On Wed, 2010-10-06 at 11:25 -0600, Jason Gunthorpe wrote: > > > > On Wed, Oct 06, 2010 at 02:52:46PM +0200, Bernard Metzler wrote: > > > > It is actually a little more complicated than just this. I assume you > > > > are casting the structures over packet payloads? In this case you > > > > have to guarentee alignment (or used packed everywhere). Does iwarp > > > > have provisions for alignment? If so you can construct your bitfields > > > > using the alignment type, ie if iWarp guarantees 4 bytes then the > > > > biggest type you can use is u32 - then you can avoid using packed. > > > > > > > > Mind you, I'm not sure how to guarentee alignment when you consider > > > > all the possible sources of unalignment in the stack: TCP, IP,L2 stack ? > > > > > > You don't have to. The TCP stack, IIRC, requires the architecture to fix > > > up misaligned accesses, or at least it used to. It will try to keep > > > things aligned if possible -- see the comment above NET_IP_ALIGN in > > > include/linux/skbuff.h. > > > > I was under the impression that this was just to align the IP > > header. > > It is, and it assumes an ethernet header of 14 bytes. As you note, VLANs > and everything else can mess with that, which is why the TCP stack > requires the architecture to handle unaligned accesses. It's a bonus if > iWarp can guarantee an alignment WRT the IP or TCP header, but not > required. > > > > The structures in Bernard's header have all fields naturally > > > aligned, so no need for the packed attribute and its baggage. > > > > No, they arent: > > > > > +struct iwarp_rdma_write { > > > + struct iwarp_ctrl ctrl; > > > + __u32 sink_stag; > > > + __u64 sink_to; > > > +} __attribute__((__packed__)); > > > > For instance has sink_to unaligned if the maximum aligment of the > > payload is only guarenteed to be 4. The proper thing to do in this > > case is probably __attribute__((__packed__, __aligned__(4))) if that > > works the way I think.. :) > > struct iwarp_ctrl consists of 2 u16s, so sink_stag is aligned to 4 bytes > from the start of the struct iwarp_rdma_write, and sink_to is at 8 bytes > -- so they are naturally aligned with respect to the structure. The > compiler will not add padding, so no need for the packed attribute. > > Note I'm not talking about alignment in memory -- that'll depend on > where the data starts, and as I mentioned, the stack doesn't make any > guarantees about that, relying on the architecture to fixup any > misaligned accesses. agreed. all n-bytes are starting on a multipe of n-byte offset relative to the start of the structure. many thanks, bernard. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/infiniband/hw/siw/iwarp.h b/drivers/infiniband/hw/siw/iwarp.h new file mode 100644 index 0000000..762c1d3 --- /dev/null +++ b/drivers/infiniband/hw/siw/iwarp.h @@ -0,0 +1,324 @@ +/* + * Software iWARP device driver for Linux + * + * Authors: Bernard Metzler <bmt@zurich.ibm.com> + * Fredy Neeser <nfd@zurich.ibm.com> + * + * Copyright (c) 2008-2010, IBM Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of IBM nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _IWARP_H +#define _IWARP_H + +#include <rdma/rdma_user_cm.h> /* RDMA_MAX_PRIVATE_DATA */ +#include <linux/types.h> +#include <asm/byteorder.h> + + +#define RDMAP_VERSION 1 +#define DDP_VERSION 1 +#define MPA_REVISION_1 1 +#define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA +#define MPA_KEY_REQ "MPA ID Req Frame" +#define MPA_KEY_REP "MPA ID Rep Frame" + +struct mpa_rr_params { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res:5, + r:1, + c:1, + m:1, + rev:8; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 m:1, + c:1, + r:1, + res:5, + rev:8; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __u16 pd_len; +}; + +/* + * MPA request/reply header + */ +struct mpa_rr { + __u8 key[16]; + struct mpa_rr_params params; +}; + +/* + * Don't change the layout/size of this struct! + */ +struct mpa_marker { + __u16 rsvd; + __u16 fpdu_hmd; /* FPDU header-marker distance (= MPA's FPDUPTR) */ +}; + +#define MPA_MARKER_SPACING 512 +#define MPA_HDR_SIZE 2 + +/* + * MPA marker size: + * - Standards-compliant marker insertion: Use sizeof(struct mpa_marker) + * - "Invisible markers" for testing sender's marker insertion + * without affecting receiver: Use 0 + */ +#define MPA_MARKER_SIZE sizeof(struct mpa_marker) + + +/* + * maximum MPA trailer + */ +struct mpa_trailer { + char pad[4]; + __u32 crc; +}; + +#define MPA_CRC_SIZE 4 + + +/* + * Common portion of iWARP headers (MPA, DDP, RDMAP) + * for any FPDU + */ +struct iwarp_ctrl { + __u16 mpa_len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 dv:2, /* DDP Version */ + rsvd:4, /* DDP reserved, MBZ */ + l:1, /* DDP Last flag */ + t:1, /* DDP Tagged flag */ + opcode:4, /* RDMAP opcode */ + rsv:2, /* RDMAP reserved, MBZ */ + rv:2; /* RDMAP Version, 01 for IETF, 00 for RDMAC */ +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 t:1, /* DDP Tagged flag */ + l:1, /* DDP Last flag */ + rsvd:4, /* DDP reserved, MBZ */ + dv:2, /* DDP Version */ + rv:2, /* RDMAP Version, 01 for IETF, 00 for RDMAC */ + rsv:2, /* RDMAP reserved, MBZ */ + opcode:4; /* RDMAP opcode */ +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif +}; + + +struct rdmap_terminate_ctrl { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u32 etype:4, + layer:4, + ecode:8, + rsvd1:5, + r:1, + d:1, + m:1, + rsvd2:8; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u32 layer:4, + etype:4, + ecode:8, + m:1, + d:1, + r:1, + rsvd1:5, + rsvd2:8; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif +}; + + +struct iwarp_rdma_write { + struct iwarp_ctrl ctrl; + __u32 sink_stag; + __u64 sink_to; +} __attribute__((__packed__)); + +struct iwarp_rdma_rreq { + struct iwarp_ctrl ctrl; + __u32 rsvd; + __u32 ddp_qn; + __u32 ddp_msn; + __u32 ddp_mo; + __u32 sink_stag; + __u64 sink_to; + __u32 read_size; + __u32 source_stag; + __u64 source_to; +} __attribute__((__packed__)); + +struct iwarp_rdma_rresp { + struct iwarp_ctrl ctrl; + __u32 sink_stag; + __u64 sink_to; +} __attribute__((__packed__)); + +struct iwarp_send { + struct iwarp_ctrl ctrl; + __u32 rsvd; + __u32 ddp_qn; + __u32 ddp_msn; + __u32 ddp_mo; +} __attribute__((__packed__)); + +struct iwarp_send_inv { + struct iwarp_ctrl ctrl; + __u32 inval_stag; + __u32 ddp_qn; + __u32 ddp_msn; + __u32 ddp_mo; +} __attribute__((__packed__)); + +struct iwarp_terminate { + struct iwarp_ctrl ctrl; + __u32 rsvd; + __u32 ddp_qn; + __u32 ddp_msn; + __u32 ddp_mo; + struct rdmap_terminate_ctrl term_ctrl; +} __attribute__((__packed__)); + + +/* + * Common portion of iWARP headers (MPA, DDP, RDMAP) + * for an FPDU carrying an untagged DDP segment + */ +struct iwarp_ctrl_untagged { + struct iwarp_ctrl ctrl; + __u32 rsvd; + __u32 ddp_qn; + __u32 ddp_msn; + __u32 ddp_mo; +} __attribute__((__packed__)); + +/* + * Common portion of iWARP headers (MPA, DDP, RDMAP) + * for an FPDU carrying a tagged DDP segment + */ +struct iwarp_ctrl_tagged { + struct iwarp_ctrl ctrl; + __u32 ddp_stag; + __u64 ddp_to; +} __attribute__((__packed__)); + +union iwarp_hdrs { + struct iwarp_ctrl ctrl; + struct iwarp_ctrl_untagged c_untagged; + struct iwarp_ctrl_tagged c_tagged; + struct iwarp_rdma_write rwrite; + struct iwarp_rdma_rreq rreq; + struct iwarp_rdma_rresp rresp; + struct iwarp_terminate terminate; + struct iwarp_send send; + struct iwarp_send_inv send_inv; +}; + + +#define MPA_MIN_FRAG ((sizeof(union iwarp_hdrs) + MPA_CRC_SIZE)) + +enum ddp_etype { + DDP_ETYPE_CATASTROPHIC = 0x0, + DDP_ETYPE_TAGGED_BUF = 0x1, + DDP_ETYPE_UNTAGGED_BUF = 0x2, + DDP_ETYPE_RSVD = 0x3 +}; + +enum ddp_ecode { + DDP_ECODE_CATASTROPHIC = 0x00, + /* Tagged Buffer Errors */ + DDP_ECODE_T_INVALID_STAG = 0x00, + DDP_ECODE_T_BASE_BOUNDS = 0x01, + DDP_ECODE_T_STAG_NOT_ASSOC = 0x02, + DDP_ECODE_T_TO_WRAP = 0x03, + DDP_ECODE_T_DDP_VERSION = 0x04, + /* Untagged Buffer Errors */ + DDP_ECODE_UT_INVALID_QN = 0x01, + DDP_ECODE_UT_INVALID_MSN_NOBUF = 0x02, + DDP_ECODE_UT_INVALID_MSN_RANGE = 0x03, + DDP_ECODE_UT_INVALID_MO = 0x04, + DDP_ECODE_UT_MSG_TOOLONG = 0x05, + DDP_ECODE_UT_DDP_VERSION = 0x06 +}; + + +enum rdmap_untagged_qn { + RDMAP_UNTAGGED_QN_SEND = 0, + RDMAP_UNTAGGED_QN_RDMA_READ = 1, + RDMAP_UNTAGGED_QN_TERMINATE = 2, + RDMAP_UNTAGGED_QN_COUNT = 3 +}; + +enum rdmap_etype { + RDMAP_ETYPE_CATASTROPHIC = 0x0, + RDMAP_ETYPE_REMOTE_PROTECTION = 0x1, + RDMAP_ETYPE_REMOTE_OPERATION = 0x2 +}; + +enum rdmap_ecode { + RDMAP_ECODE_INVALID_STAG = 0x00, + RDMAP_ECODE_BASE_BOUNDS = 0x01, + RDMAP_ECODE_ACCESS_RIGHTS = 0x02, + RDMAP_ECODE_STAG_NOT_ASSOC = 0x03, + RDMAP_ECODE_TO_WRAP = 0x04, + RDMAP_ECODE_RDMAP_VERSION = 0x05, + RDMAP_ECODE_UNEXPECTED_OPCODE = 0x06, + RDMAP_ECODE_CATASTROPHIC_STREAM = 0x07, + RDMAP_ECODE_CATASTROPHIC_GLOBAL = 0x08, + RDMAP_ECODE_STAG_NOT_INVALIDATE = 0x09, + RDMAP_ECODE_UNSPECIFIED = 0xff +}; + +enum rdmap_elayer { + RDMAP_ERROR_LAYER_RDMA = 0x00, + RDMAP_ERROR_LAYER_DDP = 0x01, + RDMAP_ERROR_LAYER_LLP = 0x02 /* eg., MPA */ +}; + +enum rdma_opcode { + RDMAP_RDMA_WRITE = 0x0, + RDMAP_RDMA_READ_REQ = 0x1, + RDMAP_RDMA_READ_RESP = 0x2, + RDMAP_SEND = 0x3, + RDMAP_SEND_INVAL = 0x4, + RDMAP_SEND_SE = 0x5, + RDMAP_SEND_SE_INVAL = 0x6, + RDMAP_TERMINATE = 0x7, + RDMAP_NOT_SUPPORTED = RDMAP_TERMINATE + 1 +}; + +#endif