Message ID | 1454264009-24094-8-git-send-email-wexu@redhat.com |
---|---|
State | New |
Headers | show |
On 02/01/2016 02:13 AM, wexu@redhat.com wrote: > From: Wei Xu <wexu@redhat.com> > > Normally it includes 2 typical way to handle a TCP control flag, bypass > and finalize, bypass means should be sent out directly, and finalize > means the packets should also be bypassed, and this should be done > after searching for the same connection packets in the pool and sending > all of them out, this is to avoid out of data. > > All the 'SYN' packets will be bypassed since this always begin a new' > connection, other flag such 'FIN/RST' will trigger a finalization, because > this normally happens upon a connection is going to be closed. > > Signed-off-by: Wei Xu <wexu@redhat.com> > --- > hw/net/virtio-net.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 66 insertions(+) > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index 88fc4f8..b0987d0 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -41,6 +41,12 @@ > > #define VIRTIO_HEADER 12 /* Virtio net header size */ > #define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) > + > +#define IP4_ADDR_OFFSET (IP_OFFSET + 12) /* ipv4 address start */ > +#define TCP4_OFFSET (IP_OFFSET + sizeof(struct ip_header)) /* tcp4 header */ > +#define TCP4_PORT_OFFSET TCP4_OFFSET /* tcp4 port offset */ > +#define IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ > +#define TCP_PORT_SIZE 4 /* sport + dport */ > #define TCP_WINDOW 65535 > > /* IPv4 max payload, 16 bits in the header */ > @@ -1850,6 +1856,27 @@ static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, > o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD); > } > > + > +/* Pakcets with 'SYN' should bypass, other flag should be sent after drain > + * to prevent out of order */ > +static int virtio_net_rsc_parse_tcp_ctrl(uint8_t *ip, uint16_t offset) > +{ > + uint16_t tcp_flag; > + struct tcp_header *tcp; > + > + tcp = (struct tcp_header *)(ip + offset); > + tcp_flag = htons(tcp->th_offset_flags) & 0x3F; > + if (tcp_flag & TH_SYN) { > + return RSC_BYPASS; > + } > + > + if (tcp_flag & (TH_FIN | TH_URG | TH_RST)) { > + return RSC_FINAL; > + } > + > + return 0; > +} To avid breaking bisection, need to squash this into previous patches for a complete implementation of tcp coalescing. > + > static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, > const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) > { > @@ -1895,12 +1922,51 @@ static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, > return virtio_net_rsc_cache_buf(chain, nc, buf, size); > } > > +/* Drain a connection data, this is to avoid out of order segments */ > +static size_t virtio_net_rsc_drain_one(NetRscChain *chain, NetClientState *nc, > + const uint8_t *buf, size_t size, uint16_t ip_start, > + uint16_t ip_size, uint16_t tcp_port, uint16_t port_size) > +{ > + NetRscSeg *seg, *nseg; > + > + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { > + if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) > + || memcmp(buf + tcp_port, seg->buf + tcp_port, port_size)) { Do you really mean "||" here? > + continue; > + } > + if ((chain->proto == ETH_P_IP) && seg->is_coalesced) { > + virtio_net_rsc_ipv4_checksum(seg); > + } > + > + virtio_net_do_receive(seg->nc, seg->buf, seg->size); > + > + QTAILQ_REMOVE(&chain->buffers, seg, next); > + g_free(seg->buf); > + g_free(seg); The above three or four lines looks like a duplication two or three times in the codes of previous patch. Need consider a new helper. > + break; > + } > + > + return virtio_net_do_receive(nc, buf, size); > +} > static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, > const uint8_t *buf, size_t size) > { > + int32_t ret; > + struct ip_header *ip; > NetRscChain *chain; > > chain = (NetRscChain *)opq; > + ip = (struct ip_header *)(buf + IP_OFFSET); > + > + ret = virtio_net_rsc_parse_tcp_ctrl((uint8_t *)ip, > + (0xF & ip->ip_ver_len) << 2); This looks like a layer violation here. I think it should be done in virtio_net_rsc_roalesce_tcp(). > + if (RSC_BYPASS == ret) { > + return virtio_net_do_receive(nc, buf, size); > + } else if (RSC_FINAL == ret) { > + return virtio_net_rsc_drain_one(chain, nc, buf, size, IP4_ADDR_OFFSET, > + IP4_ADDR_SIZE, TCP4_PORT_OFFSET, TCP_PORT_SIZE); It's better for virtio_net_rsc_drain_one() itself to check the ip proto and switch to use v4 or v6 offset/size, instead of passing a long parameter list of OFFSET/SIZE macros. > + } > + > return virtio_net_rsc_callback(chain, nc, buf, size, > virtio_net_rsc_try_coalesce4); > }
On 02/01/2016 02:44 PM, Jason Wang wrote: > > On 02/01/2016 02:13 AM, wexu@redhat.com wrote: >> From: Wei Xu <wexu@redhat.com> >> >> Normally it includes 2 typical way to handle a TCP control flag, bypass >> and finalize, bypass means should be sent out directly, and finalize >> means the packets should also be bypassed, and this should be done >> after searching for the same connection packets in the pool and sending >> all of them out, this is to avoid out of data. >> >> All the 'SYN' packets will be bypassed since this always begin a new' >> connection, other flag such 'FIN/RST' will trigger a finalization, because >> this normally happens upon a connection is going to be closed. >> >> Signed-off-by: Wei Xu <wexu@redhat.com> >> --- >> hw/net/virtio-net.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 66 insertions(+) >> >> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c >> index 88fc4f8..b0987d0 100644 >> --- a/hw/net/virtio-net.c >> +++ b/hw/net/virtio-net.c >> @@ -41,6 +41,12 @@ >> >> #define VIRTIO_HEADER 12 /* Virtio net header size */ >> #define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) >> + >> +#define IP4_ADDR_OFFSET (IP_OFFSET + 12) /* ipv4 address start */ >> +#define TCP4_OFFSET (IP_OFFSET + sizeof(struct ip_header)) /* tcp4 header */ >> +#define TCP4_PORT_OFFSET TCP4_OFFSET /* tcp4 port offset */ >> +#define IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ >> +#define TCP_PORT_SIZE 4 /* sport + dport */ >> #define TCP_WINDOW 65535 >> >> /* IPv4 max payload, 16 bits in the header */ >> @@ -1850,6 +1856,27 @@ static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, >> o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD); >> } >> >> + >> +/* Pakcets with 'SYN' should bypass, other flag should be sent after drain >> + * to prevent out of order */ >> +static int virtio_net_rsc_parse_tcp_ctrl(uint8_t *ip, uint16_t offset) >> +{ >> + uint16_t tcp_flag; >> + struct tcp_header *tcp; >> + >> + tcp = (struct tcp_header *)(ip + offset); >> + tcp_flag = htons(tcp->th_offset_flags) & 0x3F; >> + if (tcp_flag & TH_SYN) { >> + return RSC_BYPASS; >> + } >> + >> + if (tcp_flag & (TH_FIN | TH_URG | TH_RST)) { >> + return RSC_FINAL; >> + } >> + >> + return 0; >> +} > To avid breaking bisection, need to squash this into previous patches > for a complete implementation of tcp coalescing. OK. > >> + >> static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, >> const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) >> { >> @@ -1895,12 +1922,51 @@ static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, >> return virtio_net_rsc_cache_buf(chain, nc, buf, size); >> } >> >> +/* Drain a connection data, this is to avoid out of order segments */ >> +static size_t virtio_net_rsc_drain_one(NetRscChain *chain, NetClientState *nc, >> + const uint8_t *buf, size_t size, uint16_t ip_start, >> + uint16_t ip_size, uint16_t tcp_port, uint16_t port_size) >> +{ >> + NetRscSeg *seg, *nseg; >> + >> + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { >> + if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) >> + || memcmp(buf + tcp_port, seg->buf + tcp_port, port_size)) { > Do you really mean "||" here? Oops, it's '&&' here. > >> + continue; >> + } >> + if ((chain->proto == ETH_P_IP) && seg->is_coalesced) { >> + virtio_net_rsc_ipv4_checksum(seg); >> + } >> + >> + virtio_net_do_receive(seg->nc, seg->buf, seg->size); >> + >> + QTAILQ_REMOVE(&chain->buffers, seg, next); >> + g_free(seg->buf); >> + g_free(seg); > The above three or four lines looks like a duplication two or three > times in the codes of previous patch. Need consider a new helper. OK. > >> + break; >> + } >> + >> + return virtio_net_do_receive(nc, buf, size); >> +} >> static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, >> const uint8_t *buf, size_t size) >> { >> + int32_t ret; >> + struct ip_header *ip; >> NetRscChain *chain; >> >> chain = (NetRscChain *)opq; >> + ip = (struct ip_header *)(buf + IP_OFFSET); >> + >> + ret = virtio_net_rsc_parse_tcp_ctrl((uint8_t *)ip, >> + (0xF & ip->ip_ver_len) << 2); > This looks like a layer violation here. I think it should be done in > virtio_net_rsc_roalesce_tcp(). Good idea, will check it out. > >> + if (RSC_BYPASS == ret) { >> + return virtio_net_do_receive(nc, buf, size); >> + } else if (RSC_FINAL == ret) { >> + return virtio_net_rsc_drain_one(chain, nc, buf, size, IP4_ADDR_OFFSET, >> + IP4_ADDR_SIZE, TCP4_PORT_OFFSET, TCP_PORT_SIZE); > It's better for virtio_net_rsc_drain_one() itself to check the ip proto > and switch to use v4 or v6 offset/size, instead of passing a long > parameter list of OFFSET/SIZE macros. Yes, is considering optimizing it. > >> + } >> + >> return virtio_net_rsc_callback(chain, nc, buf, size, >> virtio_net_rsc_try_coalesce4); >> } >
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 88fc4f8..b0987d0 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -41,6 +41,12 @@ #define VIRTIO_HEADER 12 /* Virtio net header size */ #define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) + +#define IP4_ADDR_OFFSET (IP_OFFSET + 12) /* ipv4 address start */ +#define TCP4_OFFSET (IP_OFFSET + sizeof(struct ip_header)) /* tcp4 header */ +#define TCP4_PORT_OFFSET TCP4_OFFSET /* tcp4 port offset */ +#define IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ +#define TCP_PORT_SIZE 4 /* sport + dport */ #define TCP_WINDOW 65535 /* IPv4 max payload, 16 bits in the header */ @@ -1850,6 +1856,27 @@ static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD); } + +/* Pakcets with 'SYN' should bypass, other flag should be sent after drain + * to prevent out of order */ +static int virtio_net_rsc_parse_tcp_ctrl(uint8_t *ip, uint16_t offset) +{ + uint16_t tcp_flag; + struct tcp_header *tcp; + + tcp = (struct tcp_header *)(ip + offset); + tcp_flag = htons(tcp->th_offset_flags) & 0x3F; + if (tcp_flag & TH_SYN) { + return RSC_BYPASS; + } + + if (tcp_flag & (TH_FIN | TH_URG | TH_RST)) { + return RSC_FINAL; + } + + return 0; +} + static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) { @@ -1895,12 +1922,51 @@ static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, return virtio_net_rsc_cache_buf(chain, nc, buf, size); } +/* Drain a connection data, this is to avoid out of order segments */ +static size_t virtio_net_rsc_drain_one(NetRscChain *chain, NetClientState *nc, + const uint8_t *buf, size_t size, uint16_t ip_start, + uint16_t ip_size, uint16_t tcp_port, uint16_t port_size) +{ + NetRscSeg *seg, *nseg; + + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { + if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) + || memcmp(buf + tcp_port, seg->buf + tcp_port, port_size)) { + continue; + } + if ((chain->proto == ETH_P_IP) && seg->is_coalesced) { + virtio_net_rsc_ipv4_checksum(seg); + } + + virtio_net_do_receive(seg->nc, seg->buf, seg->size); + + QTAILQ_REMOVE(&chain->buffers, seg, next); + g_free(seg->buf); + g_free(seg); + break; + } + + return virtio_net_do_receive(nc, buf, size); +} static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, const uint8_t *buf, size_t size) { + int32_t ret; + struct ip_header *ip; NetRscChain *chain; chain = (NetRscChain *)opq; + ip = (struct ip_header *)(buf + IP_OFFSET); + + ret = virtio_net_rsc_parse_tcp_ctrl((uint8_t *)ip, + (0xF & ip->ip_ver_len) << 2); + if (RSC_BYPASS == ret) { + return virtio_net_do_receive(nc, buf, size); + } else if (RSC_FINAL == ret) { + return virtio_net_rsc_drain_one(chain, nc, buf, size, IP4_ADDR_OFFSET, + IP4_ADDR_SIZE, TCP4_PORT_OFFSET, TCP_PORT_SIZE); + } + return virtio_net_rsc_callback(chain, nc, buf, size, virtio_net_rsc_try_coalesce4); }