diff mbox

[RFC,V2,3/3] filter-rewriter: rewrite tcp packet to keep secondary connection

Message ID 1467440540-6630-4-git-send-email-zhangchen.fnst@cn.fujitsu.com
State New
Headers show

Commit Message

Zhang Chen July 2, 2016, 6:22 a.m. UTC
We will rewrite tcp packet secondary received and sent.
When colo guest is a tcp server.

Firstly, client start a tcp handshake. the packet's seq=client_seq,
ack=0,flag=SYN. COLO primary guest get this pkt and mirror(filter-mirror)
to secondary guest, secondary get it use filter-redirector.
Then,primary guest response pkt
(seq=primary_seq,ack=client_seq+1,flag=ACK|SYN).
secondary guest response pkt
(seq=secondary_seq,ack=client_seq+1,flag=ACK|SYN).
In here,we use filter-rewriter save the secondary_seq to it's tcp connection.
Finally handshake,client send pkt
(seq=client_seq+1,ack=primary_seq+1,flag=ACK).
Here,filter-rewriter can get primary_seq, and rewrite ack from primary_seq+1
to secondary_seq+1, recalculate checksum. So the secondary tcp connection
kept good.

When we send/recv packet.
client send pkt(seq=client_seq+1+data_len,ack=primary_seq+1,flag=ACK|PSH).
filter-rewriter rewrite ack and send to secondary guest.

primary guest response pkt
(seq=primary_seq+1,ack=client_seq+1+data_len,flag=ACK)
secondary guest response pkt
(seq=secondary_seq+1,ack=client_seq+1+data_len,flag=ACK)
we rewrite secondary guest seq from secondary_seq+1 to primary_seq+1.
So tcp connection kept good.

In code We use offset( = secondary_seq - primary_seq )
to rewrite seq or ack.
handle_primary_tcp_pkt: tcp_pkt->th_ack += offset;
handle_secondary_tcp_pkt: tcp_pkt->th_seq -= offset;

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 net/colo-base.h       |   2 +
 net/filter-rewriter.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++-
 trace-events          |   5 +++
 3 files changed, 115 insertions(+), 2 deletions(-)

Comments

Jason Wang July 4, 2016, 3:16 a.m. UTC | #1
On 2016年07月02日 14:22, Zhang Chen wrote:
> We will rewrite tcp packet secondary received and sent.
> When colo guest is a tcp server.
>
> Firstly, client start a tcp handshake. the packet's seq=client_seq,
> ack=0,flag=SYN. COLO primary guest get this pkt and mirror(filter-mirror)
> to secondary guest, secondary get it use filter-redirector.
> Then,primary guest response pkt
> (seq=primary_seq,ack=client_seq+1,flag=ACK|SYN).
> secondary guest response pkt
> (seq=secondary_seq,ack=client_seq+1,flag=ACK|SYN).
> In here,we use filter-rewriter save the secondary_seq to it's tcp connection.
> Finally handshake,client send pkt
> (seq=client_seq+1,ack=primary_seq+1,flag=ACK).
> Here,filter-rewriter can get primary_seq, and rewrite ack from primary_seq+1
> to secondary_seq+1, recalculate checksum. So the secondary tcp connection
> kept good.
>
> When we send/recv packet.
> client send pkt(seq=client_seq+1+data_len,ack=primary_seq+1,flag=ACK|PSH).
> filter-rewriter rewrite ack and send to secondary guest.
>
> primary guest response pkt
> (seq=primary_seq+1,ack=client_seq+1+data_len,flag=ACK)
> secondary guest response pkt
> (seq=secondary_seq+1,ack=client_seq+1+data_len,flag=ACK)
> we rewrite secondary guest seq from secondary_seq+1 to primary_seq+1.
> So tcp connection kept good.
>
> In code We use offset( = secondary_seq - primary_seq )
> to rewrite seq or ack.
> handle_primary_tcp_pkt: tcp_pkt->th_ack += offset;
> handle_secondary_tcp_pkt: tcp_pkt->th_seq -= offset;
>
> Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> ---
>   net/colo-base.h       |   2 +
>   net/filter-rewriter.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++-
>   trace-events          |   5 +++
>   3 files changed, 115 insertions(+), 2 deletions(-)
>
> diff --git a/net/colo-base.h b/net/colo-base.h
> index 62460c5..7b32648 100644
> --- a/net/colo-base.h
> +++ b/net/colo-base.h
> @@ -71,6 +71,8 @@ typedef struct Connection {
>       uint8_t ip_proto;
>       /* be used by filter-rewriter */
>       colo_conn_state state;
> +    /* offset = secondary_seq - primary_seq */
> +    tcp_seq  offset;

Fail to find the definition of 'tcp_seq'.

>   } Connection;
>   
>   uint32_t connection_key_hash(const void *opaque);
> diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
> index c38ab24..9f63c75 100644
> --- a/net/filter-rewriter.c
> +++ b/net/filter-rewriter.c
> @@ -21,6 +21,7 @@
>   #include "qemu/main-loop.h"
>   #include "qemu/iov.h"
>   #include "net/checksum.h"
> +#include "trace.h"
>   
>   #define FILTER_COLO_REWRITER(obj) \
>       OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
> @@ -64,6 +65,91 @@ static int is_tcp_packet(Packet *pkt)
>       }
>   }
>   
> +/* handle tcp packet from primary guest */
> +static int handle_primary_tcp_pkt(NetFilterState *nf,
> +                                  Connection *conn,
> +                                  Packet *pkt)
> +{
> +    struct tcphdr *tcp_pkt;
> +    static int syn_flag;
> +
> +    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
> +    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
> +        char *sdebug, *ddebug;
> +        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
> +        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
> +        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
> +                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
> +                    tcp_pkt->th_flags);
> +        trace_colo_filter_rewriter_conn_offset(conn->offset);
> +        g_free(sdebug);
> +        g_free(ddebug);
> +    }
> +
> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
> +        /*
> +         * this flag update offset func run oncs

typo?

> +         * in independent tcp connection
> +         */
> +        syn_flag = 1;

Does this really work if you have more than one tcp connections? You 
probably need a conn->syn_flag.

> +    }
> +
> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
> +        if (syn_flag) {
> +            /* offset = secondary_seq - primary seq */
> +            conn->offset -= (ntohl(tcp_pkt->th_ack));
> +            syn_flag = 0;
> +
> +        }
> +        /* handle packets to the secondary from the primary */
> +        tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset + 1);

Maybe I miss something, but why +1 here?
Zhang Chen July 4, 2016, 8:59 a.m. UTC | #2
On 07/04/2016 11:16 AM, Jason Wang wrote:
>
>
> On 2016年07月02日 14:22, Zhang Chen wrote:
>> We will rewrite tcp packet secondary received and sent.
>> When colo guest is a tcp server.
>>
>> Firstly, client start a tcp handshake. the packet's seq=client_seq,
>> ack=0,flag=SYN. COLO primary guest get this pkt and 
>> mirror(filter-mirror)
>> to secondary guest, secondary get it use filter-redirector.
>> Then,primary guest response pkt
>> (seq=primary_seq,ack=client_seq+1,flag=ACK|SYN).
>> secondary guest response pkt
>> (seq=secondary_seq,ack=client_seq+1,flag=ACK|SYN).
>> In here,we use filter-rewriter save the secondary_seq to it's tcp 
>> connection.
>> Finally handshake,client send pkt
>> (seq=client_seq+1,ack=primary_seq+1,flag=ACK).
>> Here,filter-rewriter can get primary_seq, and rewrite ack from 
>> primary_seq+1
>> to secondary_seq+1, recalculate checksum. So the secondary tcp 
>> connection
>> kept good.
>>
>> When we send/recv packet.
>> client send 
>> pkt(seq=client_seq+1+data_len,ack=primary_seq+1,flag=ACK|PSH).
>> filter-rewriter rewrite ack and send to secondary guest.
>>
>> primary guest response pkt
>> (seq=primary_seq+1,ack=client_seq+1+data_len,flag=ACK)
>> secondary guest response pkt
>> (seq=secondary_seq+1,ack=client_seq+1+data_len,flag=ACK)
>> we rewrite secondary guest seq from secondary_seq+1 to primary_seq+1.
>> So tcp connection kept good.
>>
>> In code We use offset( = secondary_seq - primary_seq )
>> to rewrite seq or ack.
>> handle_primary_tcp_pkt: tcp_pkt->th_ack += offset;
>> handle_secondary_tcp_pkt: tcp_pkt->th_seq -= offset;
>>
>> Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> ---
>>   net/colo-base.h       |   2 +
>>   net/filter-rewriter.c | 110 
>> +++++++++++++++++++++++++++++++++++++++++++++++++-
>>   trace-events          |   5 +++
>>   3 files changed, 115 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/colo-base.h b/net/colo-base.h
>> index 62460c5..7b32648 100644
>> --- a/net/colo-base.h
>> +++ b/net/colo-base.h
>> @@ -71,6 +71,8 @@ typedef struct Connection {
>>       uint8_t ip_proto;
>>       /* be used by filter-rewriter */
>>       colo_conn_state state;
>> +    /* offset = secondary_seq - primary_seq */
>> +    tcp_seq  offset;
>
> Fail to find the definition of 'tcp_seq'.
>

In slirp/tcp.h
typedef    uint32_t tcp_seq;

we add this .h in colo-base.h


>>   } Connection;
>>     uint32_t connection_key_hash(const void *opaque);
>> diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
>> index c38ab24..9f63c75 100644
>> --- a/net/filter-rewriter.c
>> +++ b/net/filter-rewriter.c
>> @@ -21,6 +21,7 @@
>>   #include "qemu/main-loop.h"
>>   #include "qemu/iov.h"
>>   #include "net/checksum.h"
>> +#include "trace.h"
>>     #define FILTER_COLO_REWRITER(obj) \
>>       OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
>> @@ -64,6 +65,91 @@ static int is_tcp_packet(Packet *pkt)
>>       }
>>   }
>>   +/* handle tcp packet from primary guest */
>> +static int handle_primary_tcp_pkt(NetFilterState *nf,
>> +                                  Connection *conn,
>> +                                  Packet *pkt)
>> +{
>> +    struct tcphdr *tcp_pkt;
>> +    static int syn_flag;
>> +
>> +    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
>> +    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
>> +        char *sdebug, *ddebug;
>> +        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
>> +        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
>> +        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
>> +                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
>> +                    tcp_pkt->th_flags);
>> + trace_colo_filter_rewriter_conn_offset(conn->offset);
>> +        g_free(sdebug);
>> +        g_free(ddebug);
>> +    }
>> +
>> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
>> +        /*
>> +         * this flag update offset func run oncs
>
> typo?

S/oncs/once

>
>> +         * in independent tcp connection
>> +         */
>> +        syn_flag = 1;
>
> Does this really work if you have more than one tcp connections? You 
> probably need a conn->syn_flag.

Good catch...
I will fix it in next.


>
>> +    }
>> +
>> +    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
>> +        if (syn_flag) {
>> +            /* offset = secondary_seq - primary seq */
>> +            conn->offset -= (ntohl(tcp_pkt->th_ack));
>> +            syn_flag = 0;
>> +
>> +        }
>> +        /* handle packets to the secondary from the primary */
>> +        tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + 
>> conn->offset + 1);
>
> Maybe I miss something, but why +1 here?
>
>

No,I miss something.
+            /* offset = secondary_seq - primary seq */
+            conn->offset -= (ntohl(tcp_pkt->th_ack));
should be
+            conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);

(ntohl(tcp_pkt->th_ack) - 1) is the primary seq

+        tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);

I will fix it.

Thanks
Zhang Chen
>
> .
>
diff mbox

Patch

diff --git a/net/colo-base.h b/net/colo-base.h
index 62460c5..7b32648 100644
--- a/net/colo-base.h
+++ b/net/colo-base.h
@@ -71,6 +71,8 @@  typedef struct Connection {
     uint8_t ip_proto;
     /* be used by filter-rewriter */
     colo_conn_state state;
+    /* offset = secondary_seq - primary_seq */
+    tcp_seq  offset;
 } Connection;
 
 uint32_t connection_key_hash(const void *opaque);
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
index c38ab24..9f63c75 100644
--- a/net/filter-rewriter.c
+++ b/net/filter-rewriter.c
@@ -21,6 +21,7 @@ 
 #include "qemu/main-loop.h"
 #include "qemu/iov.h"
 #include "net/checksum.h"
+#include "trace.h"
 
 #define FILTER_COLO_REWRITER(obj) \
     OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
@@ -64,6 +65,91 @@  static int is_tcp_packet(Packet *pkt)
     }
 }
 
+/* handle tcp packet from primary guest */
+static int handle_primary_tcp_pkt(NetFilterState *nf,
+                                  Connection *conn,
+                                  Packet *pkt)
+{
+    struct tcphdr *tcp_pkt;
+    static int syn_flag;
+
+    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
+    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+        char *sdebug, *ddebug;
+        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
+        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
+        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
+                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+                    tcp_pkt->th_flags);
+        trace_colo_filter_rewriter_conn_offset(conn->offset);
+        g_free(sdebug);
+        g_free(ddebug);
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
+        /*
+         * this flag update offset func run oncs
+         * in independent tcp connection
+         */
+        syn_flag = 1;
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
+        if (syn_flag) {
+            /* offset = secondary_seq - primary seq */
+            conn->offset -= (ntohl(tcp_pkt->th_ack));
+            syn_flag = 0;
+
+        }
+        /* handle packets to the secondary from the primary */
+        tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset + 1);
+
+        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
+    }
+
+    return 0;
+}
+
+/* handle tcp packet from secondary guest */
+static int handle_secondary_tcp_pkt(NetFilterState *nf,
+                                    Connection *conn,
+                                    Packet *pkt)
+{
+    struct tcphdr *tcp_pkt;
+
+    tcp_pkt = (struct tcphdr *)pkt->transport_layer;
+
+    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+        char *sdebug, *ddebug;
+        sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
+        ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
+        trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
+                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+                    tcp_pkt->th_flags);
+        trace_colo_filter_rewriter_conn_offset(conn->offset);
+        g_free(sdebug);
+        g_free(ddebug);
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
+        /*
+         * save offset = secondary_seq and then
+         * in handle_primary_tcp_pkt make offset
+         * = secondary_seq - primary_seq
+         */
+        conn->offset = ntohl(tcp_pkt->th_seq);
+    }
+
+    if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
+        /* handle packets to the primary from the secondary*/
+        tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset - 1);
+
+        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
+    }
+
+    return 0;
+}
+
 static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
                                          NetClientState *sender,
                                          unsigned flags,
@@ -105,10 +191,30 @@  static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
 
         if (sender == nf->netdev) {
             /* NET_FILTER_DIRECTION_TX */
-            /* handle_primary_tcp_pkt */
+            if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
+                qemu_net_queue_send(s->incoming_queue, sender, 0,
+                (const uint8_t *)pkt->data, pkt->size, NULL);
+                packet_destroy(pkt, NULL);
+                pkt = NULL;
+                /*
+                 * We block the packet here,after rewrite pkt
+                 * and will send it
+                 */
+                return 1;
+            }
         } else {
             /* NET_FILTER_DIRECTION_RX */
-            /* handle_secondary_tcp_pkt */
+            if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
+                qemu_net_queue_send(s->incoming_queue, sender, 0,
+                (const uint8_t *)pkt->data, pkt->size, NULL);
+                packet_destroy(pkt, NULL);
+                pkt = NULL;
+                /*
+                 * We block the packet here,after rewrite pkt
+                 * and will send it
+                 */
+                return 1;
+            }
         }
     }
 
diff --git a/trace-events b/trace-events
index 6686cdf..5ac56f6 100644
--- a/trace-events
+++ b/trace-events
@@ -1927,3 +1927,8 @@  colo_compare_icmp_miscompare_mtu(const char *sta, int size) ": %s  %d"
 colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
 colo_old_packet_check_found(int64_t old_time) "%" PRId64
 colo_compare_miscompare(void) ""
+
+# net/filter-rewriter.c
+colo_filter_rewriter_debug(void) ""
+colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u  flags=%x\n"
+colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n"