@@ -22,6 +22,9 @@
#include <linux/ip.h>
#include <linux/ipv6.h> /* for struct ipv6hdr */
#include <net/ipv6.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#endif
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
#endif
@@ -105,8 +108,10 @@ extern int ip_vs_conn_tab_size;
struct ip_vs_iphdr {
- int len;
- __u8 protocol;
+ __s32 len; /* offset where to find next header */
+ __s32 flags;
+ __u16 fragoffs;
+ __s16 protocol;
union nf_inet_addr saddr;
union nf_inet_addr daddr;
};
@@ -132,6 +137,42 @@ ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr)
}
}
+static inline void
+ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
+{
+ iphdr->len = 0;
+ iphdr->flags = 0;
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ const struct ipv6hdr *iph = (struct ipv6hdr *)skb_network_header(skb);
+
+ iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1,
+ &iphdr->fragoffs,
+ &iphdr->flags);
+ iphdr->saddr.in6 = iph->saddr;
+ iphdr->daddr.in6 = iph->daddr;
+ } else
+#endif
+ {
+ const struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
+ iphdr->len = iph->ihl * 4;
+ iphdr->protocol = iph->protocol;
+ iphdr->saddr.ip = iph->saddr;
+ iphdr->daddr.ip = iph->daddr;
+ }
+}
+#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
+static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
+{
+ return skb->nfct_reasm;
+}
+#else
+static inline struct sk_buff *skb_reasm(const struct sk_buff *skb)
+{
+ return NULL;
+}
+#endif
+
static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
const union nf_inet_addr *src)
{
@@ -412,7 +453,7 @@ struct ip_vs_protocol {
const struct sk_buff *skb,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
- int inverse);
+ int inverse, unsigned short foffs);
int (*snat_handler)(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
@@ -1001,7 +1042,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
- int inverse);
+ int inverse, unsigned short foffs);
/* put back the conn without restarting its timer */
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
@@ -432,13 +432,19 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
struct ip_vs_conn *
ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph,
- unsigned int proto_off, int inverse)
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse, unsigned short foffs)
{
struct ip_vs_conn_param p;
- if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
- return NULL;
+ if (unlikely(foffs)) {
+ if (ip_vs_conn_fill_param_proto(af, skb_nfct_reasm(skb), iph,
+ proto_off, inverse, &p))
+ return NULL;
+ } else {
+ if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
+ return NULL;
+ }
return ip_vs_conn_out_get(&p);
}
@@ -235,7 +235,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
union nf_inet_addr snet; /* source network of the client,
after masking */
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(svc->af, skb, &iph);
/* Mask saddr with the netmask to adjust template granularity */
#ifdef CONFIG_IP_VS_IPV6
@@ -402,7 +402,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
unsigned int flags;
*ignored = 1;
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(svc->af, skb, &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
@@ -506,7 +506,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
int unicast;
#endif
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(svc->af, skb, &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
if (pptr == NULL) {
@@ -654,14 +654,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
return err;
}
-#ifdef CONFIG_IP_VS_IPV6
-static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
-{
- /* TODO IPv6: Find out what to do here for IPv6 */
- return 0;
-}
-#endif
-
static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
{
#ifdef CONFIG_IP_VS_IPV6
@@ -819,6 +811,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
ip_vs_out_stats(cp, skb);
skb->ipvs_property = 1;
+ IP_VS_DBG(1, " ICMP(%p) response set track(%x) %pI6c#%d %pI6c#%d %pI6c#%d fw:%d len:%d\n",
+ skb, cp->flags & IP_VS_CONN_F_NFCT,
+ &cp->caddr.in6, ntohs(cp->cport),
+ &cp->vaddr.in6, ntohs(cp->vport),
+ &cp->daddr.in6, ntohs(cp->dport), cp->fwmark, skb->len);
if (!(cp->flags & IP_VS_CONN_F_NFCT))
ip_vs_notrack(skb);
else
@@ -902,7 +899,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1, 0);
if (!cp)
return NF_ACCEPT;
@@ -913,36 +910,34 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
#ifdef CONFIG_IP_VS_IPV6
static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
- unsigned int hooknum)
+ unsigned int hooknum, struct ip_vs_iphdr *ipvsh,
+ unsigned short fragoffs)
{
- struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
- struct ipv6hdr _ciph, *cih; /* The ip header contained
+ struct ipv6hdr _ip6, *ip6; /* The ip header contained
within the ICMP */
- struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
- unsigned int offset;
union nf_inet_addr snet;
*related = 1;
- /* reassemble IP fragments */
- if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
- if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
- return NF_STOLEN;
- }
-
- iph = ipv6_hdr(skb);
- offset = sizeof(struct ipv6hdr);
- ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ /*
+ * Fragment is before ICMP header which tells us that this is not an
+ * error message since they can't be fragmented.
+ */
+ if (unlikely(fragoffs)) {
+ IP_VS_DBG(1,"Outgoing ICMPv6 frag(%d) %pI6c->%pI6c offs:%d\n",
+ fragoffs, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ ipvsh->fragoffs);
+ ic = skb_header_pointer(skb_nfct_reasm(skb), ipvsh->len,
+ sizeof(_icmph), &_icmph);
+ } else
+ ic = skb_header_pointer(skb, ipvsh->len, sizeof(_icmph),
+ &_icmph);
if (ic == NULL)
return NF_DROP;
- IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n",
- ic->icmp6_type, ntohs(icmpv6_id(ic)),
- &iph->saddr, &iph->daddr);
-
/*
* Work through seeing if this is for us.
* These checks are supposed to be in an order that means easy
@@ -956,36 +951,51 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
*related = 0;
return NF_ACCEPT;
}
-
- /* Now find the contained IP header */
- offset += sizeof(_icmph);
- cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
- if (cih == NULL)
- return NF_ACCEPT; /* The packet looks wrong, ignore */
-
- pp = ip_vs_proto_get(cih->nexthdr);
- if (!pp)
+ if (unlikely(fragoffs)) {
+ IP_VS_DBG(1, "***ERROR*** Don't frag an ICMPv6 Error(%d,%d)"
+ " %pI6->%pI6\n",
+ ic->icmp6_type, ntohs(icmpv6_id(ic)),
+ &ipvsh->saddr, &ipvsh->daddr);
return NF_ACCEPT;
+ }
- /* Is the embedded protocol header present? */
- /* TODO: we don't support fragmentation at the moment anyways */
- if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
- return NF_ACCEPT;
+ IP_VS_DBG(1, "Outgoing ICMPv6 Error (%d,%d) %pI6->%pI6\n",
+ ic->icmp6_type, ntohs(icmpv6_id(ic)),
+ &ipvsh->saddr, &ipvsh->daddr);
- IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
- "Checking outgoing ICMPv6 for");
+ /* Now find the contained IP header after icmp hdr */
+ ipvsh->len += sizeof(_icmph);
+ ip6 = skb_header_pointer(skb, ipvsh->len, sizeof(_ip6), &_ip6);
+ ipvsh->protocol = ipv6_find_hdr(skb, &ipvsh->len, -1,
+ &ipvsh->fragoffs, &ipvsh->flags);
- offset += sizeof(struct ipv6hdr);
+ /*
+ * Is the embedded protocol header present?
+ * If not we can't do very much, and if it's an error on a fragment
+ * we don't have the history anyway..
+ */
+ pp = ip_vs_proto_get(ipvsh->protocol);
+ if (!pp || (ipvsh->protocol < 0))
+ return NF_ACCEPT;
+ /* fill the rest of ipvsh */
+ ipvsh->saddr.in6 = ip6->saddr;
+ ipvsh->daddr.in6 = ip6->daddr;
- ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET6, skb, ipvsh, ipvsh->len, 1, 0);
if (!cp)
return NF_ACCEPT;
+ {
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, ipvsh->len, sizeof(_ports), _ports);
- snet.in6 = iph->saddr;
- return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
- pp, offset, sizeof(struct ipv6hdr));
+ IP_VS_DBG(1, "Outg ICMPv6 Error found %pI6[%d] -> %pI6[%d]\n",
+ &ip6->saddr, ntohs(pptr[0]), &ip6->daddr, ntohs(pptr[1]));
+ }
+ snet.in6 = ipvsh->saddr.in6;
+ return handle_response_icmp(AF_INET6, skb, &snet, ipvsh->protocol, cp,
+ pp, ipvsh->len, sizeof(struct ipv6hdr));
}
#endif
@@ -1064,6 +1074,11 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
skb->ipvs_property = 1;
+ IP_VS_DBG(1, " IP(%p) response set track(%x) %pI6c#%d %pI6c#%d %pI6c#%d fw:%d len:%d\n",
+ skb, cp->flags & IP_VS_CONN_F_NFCT,
+ &cp->caddr.in6, ntohs(cp->cport),
+ &cp->vaddr.in6, ntohs(cp->vport),
+ &cp->daddr.in6, ntohs(cp->dport), cp->fwmark, skb->len);
if (!(cp->flags & IP_VS_CONN_F_NFCT))
ip_vs_notrack(skb);
else
@@ -1083,14 +1098,14 @@ drop:
/*
* Check if outgoing packet belongs to the established ip_vs_conn.
*/
-static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
+static unsigned int ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
{
struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ unsigned short fragoffs = 0;
EnterFunction(11);
@@ -1115,17 +1130,32 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (!net_ipvs(net)->enable)
return NF_ACCEPT;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ /* It's a fragment take first frag */
+ if (skb_nfct_reasm(skb))
+ ip_vs_fill_iph_skb(af, skb_nfct_reasm(skb), &iph);
+ else
+ ip_vs_fill_iph_skb(af, skb, &iph);
+
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
+ if (skb_nfct_reasm(skb)) {
+ struct sk_buff *reasm = skb_nfct_reasm(skb);
+ int len = 0;
+
+ ipv6_find_hdr(skb, &len, -1, &fragoffs, NULL);
+
+ if (!fragoffs) {
+ reasm->mark = skb->mark;
+ skb_dst_copy(reasm, skb);
+ }
+ }
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
- int verdict = ip_vs_out_icmp_v6(skb, &related,
- hooknum);
-
+ int verdict = ip_vs_out_icmp_v6(skb, &related, hooknum,
+ &iph, fragoffs);
+ /* Related means an icmp error on an ipvs_conn */
if (related)
return verdict;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
} else
#endif
@@ -1135,7 +1165,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (related)
return verdict;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(af, skb, &iph);
}
pd = ip_vs_proto_data_get(net, iph.protocol);
@@ -1145,28 +1175,20 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
/* reassemble IP fragments */
#ifdef CONFIG_IP_VS_IPV6
- if (af == AF_INET6) {
- if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
- if (ip_vs_gather_frags_v6(skb,
- ip_vs_defrag_user(hooknum)))
- return NF_STOLEN;
- }
-
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
- } else
+ if (af == AF_INET)
#endif
if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
if (ip_vs_gather_frags(skb,
ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(af, skb, &iph);
}
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0, fragoffs);
if (likely(cp))
return handle_response(af, skb, pd, cp, iph.len);
@@ -1176,8 +1198,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
pp->protocol == IPPROTO_SCTP)) {
__be16 _ports[2], *pptr;
- pptr = skb_header_pointer(skb, iph.len,
- sizeof(_ports), _ports);
+ /* Handle fragments */
+ if (fragoffs)
+ pptr = skb_header_pointer(skb_nfct_reasm(skb), iph.len,
+ sizeof(_ports), _ports);
+ else
+ pptr = skb_header_pointer(skb, iph.len,
+ sizeof(_ports), _ports);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(net, af, iph.protocol,
@@ -1389,14 +1416,11 @@ out:
}
#ifdef CONFIG_IP_VS_IPV6
-static int
-ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
+static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
+ unsigned int hooknum, struct ip_vs_iphdr *iph)
{
struct net *net = NULL;
- struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
- struct ipv6hdr _ciph, *cih; /* The ip header contained
- within the ICMP */
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
@@ -1405,15 +1429,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
*related = 1;
- /* reassemble IP fragments */
- if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
- if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
- return NF_STOLEN;
- }
-
- iph = ipv6_hdr(skb);
- offset = sizeof(struct ipv6hdr);
- ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ ic = skb_header_pointer(skb, iph->len, sizeof(_icmph), &_icmph);
if (ic == NULL)
return NF_DROP;
@@ -1435,39 +1451,48 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
return NF_ACCEPT;
}
- /* Now find the contained IP header */
- offset += sizeof(_icmph);
- cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
- if (cih == NULL)
- return NF_ACCEPT; /* The packet looks wrong, ignore */
-
+ /*
+ * Now find the contained IP header
+ * an icmp error message will never be fragmented, but
+ * the contained IP header might contain an fragment...
+ */
+ ciph.len = iph->len + sizeof(_icmph);
+ ciph.flags = 0;
+ ciph.fragoffs = 0;
+ ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs,
+ &ciph.flags);
+ ciph.saddr = iph->saddr; /* con_in_get() handles reverse order */
+ ciph.daddr = iph->daddr;
+
+ /* Hmm todo: check if find_hdr should be used instead */
net = skb_net(skb);
- pd = ip_vs_proto_data_get(net, cih->nexthdr);
+ pd = ip_vs_proto_data_get(net, ciph.protocol);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
- /* Is the embedded protocol header present? */
- /* TODO: we don't support fragmentation at the moment anyways */
- if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+ /* Is the embedded protocol header present?
+ * If it's the second or later fragment we don't know what it is
+ * i.e. just let it through.
+ */
+ if (ciph.fragoffs)
return NF_ACCEPT;
+ offset = ciph.len;
IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
"Checking incoming ICMPv6 for");
- offset += sizeof(struct ipv6hdr);
-
- ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
+ cp = pp->conn_in_get(AF_INET6, skb, &ciph, ciph.len, 1);
if (!cp)
return NF_ACCEPT;
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
- if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
- IPPROTO_SCTP == cih->nexthdr)
- offset += 2 * sizeof(__u16);
+ if (IPPROTO_TCP == ciph.protocol|| IPPROTO_UDP == ciph.protocol ||
+ IPPROTO_SCTP == ciph.protocol)
+ offset = ciph.len + (2 * sizeof(__u16));
+
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum);
__ip_vs_conn_put(cp);
@@ -1489,13 +1514,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ unsigned short fragoffs = 0;
int ret, pkts;
struct netns_ipvs *ipvs;
+ struct sk_buff *reasm;
+
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
return NF_ACCEPT;
+ /* ipvs enabled in this netns ? */
+ net = skb_net(skb);
+ if (!net_ipvs(net)->enable)
+ return NF_ACCEPT;
+
/*
* Big tappo:
* - remote client: only PACKET_HOST
@@ -1503,20 +1536,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
if (unlikely((skb->pkt_type != PACKET_HOST &&
hooknum != NF_INET_LOCAL_OUT) ||
- !skb_dst(skb))) {
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
- IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
- " ignored in hook %u\n",
- skb->pkt_type, iph.protocol,
- IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
+ !skb_dst(skb))) {
+ LeaveFunction(12);
return NF_ACCEPT;
}
- /* ipvs enabled in this netns ? */
- net = skb_net(skb);
- if (!net_ipvs(net)->enable)
- return NF_ACCEPT;
-
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
/* Bad... Do not break raw sockets */
if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
@@ -1528,15 +1551,36 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
return NF_ACCEPT;
}
+ reasm = skb_nfct_reasm(skb);
+ ip_vs_fill_iph_skb(af, (reasm ? reasm : skb), &iph);
+
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
+ if (reasm) {
+ int len = 0;
+
+ ipv6_find_hdr(skb, &len, -1, &fragoffs, NULL);
+ IP_VS_DBG(1, "IN FRAG %s(%p s:%p) %pI6c %pI6c hdr:%d len:%d/%d %d fw:%u/%u\n",
+ (fragoffs) ? "+2:nd" : "1:st", reasm, skb,
+ &ipv6_hdr(reasm)->saddr, &ipv6_hdr(reasm)->daddr,
+ ipv6_hdr(reasm)->nexthdr, skb->len, reasm->len, fragoffs,
+ skb->mark, reasm->mark);
+ }
+ /*
+ * Save first fragment dst & fwmark to the re-assembly skb
+ */
+ if (!fragoffs && reasm) {
+ reasm->mark = skb->mark;
+ skb_dst_copy(reasm, skb);
+ }
+
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
- int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
+ int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
+ &iph);
if (related)
return verdict;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
} else
#endif
@@ -1546,7 +1590,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (related)
return verdict;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ /* I don't think this one is needed ... /HS */
+ ip_vs_fill_iph_skb(af, skb, &iph);
}
/* Protocol supported? */
@@ -1556,14 +1601,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
pp = pd->pp;
/*
* Check if the packet belongs to an existing connection entry
+ * Only sched first IPv6 fragment.
*/
- cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
+ if (fragoffs)
+ cp = pp->conn_in_get(af, reasm, &iph, iph.len, 0);
+ else {
+ cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
- if (unlikely(!cp)) {
- int v;
+ if (unlikely(!cp)) {
+ int v;
- if (!pp->conn_schedule(af, skb, pd, &v, &cp))
- return v;
+ if (!pp->conn_schedule(af, skb, pd, &v, &cp))
+ return v;
+ }
}
if (unlikely(!cp)) {
@@ -1678,6 +1728,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
}
#ifdef CONFIG_IP_VS_IPV6
+/*
+ * AF_INET6 fragment handling
+ * Copy info from first fragment, to the rest of them.
+ */
+static unsigned int
+ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct ip_vs_iphdr iphdr = { .len = 0, .flags = 0, };
+ struct sk_buff *reasm = skb_nfct_reasm(skb);
+ struct net *net;
+
+ /* This is not a "replay" from nf_ct_frag6_output */
+ if (!reasm)
+ return NF_ACCEPT;
+
+ net = skb_net(skb);
+ if (!net_ipvs(net)->enable)
+ return NF_ACCEPT;
+
+ iphdr.protocol = ipv6_find_hdr(skb, &iphdr.len, -1, &iphdr.fragoffs,
+ &iphdr.flags);
+ if (!iphdr.fragoffs)
+ return NF_ACCEPT;
+ /* Copy stored mark & dst from ip_vs_in / out */
+ skb->mark = reasm->mark;
+ skb_dst_copy(skb,reasm);
+
+ return NF_ACCEPT;
+}
/*
* AF_INET6 handler in NF_INET_LOCAL_IN chain
@@ -1749,8 +1831,10 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
{
int r;
struct net *net;
+ struct ip_vs_iphdr iphdr;
- if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+ ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
+ if (iphdr.protocol != IPPROTO_ICMPV6)
return NF_ACCEPT;
/* ipvs enabled in this netns ? */
@@ -1758,7 +1842,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
if (!net_ipvs(net)->enable)
return NF_ACCEPT;
- return ip_vs_in_icmp_v6(skb, &r, hooknum);
+ return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
}
#endif
@@ -1816,6 +1900,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.priority = 100,
},
#ifdef CONFIG_IP_VS_IPV6
+ /* After mangle & nat fetch 2:nd fragment and following */
+ {
+ .hook = ip_vs_preroute_frag6,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_NAT_DST + 1,
+ },
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
@@ -87,7 +87,7 @@ static struct ip_vs_conn *
ah_esp_conn_out_get(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
- int inverse)
+ int inverse, unsigned short foffs)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
@@ -40,7 +40,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct tcphdr _tcph, *th;
struct ip_vs_iphdr iph;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(af, skb, &iph);
th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
if (th == NULL) {
@@ -37,7 +37,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct udphdr _udph, *uh;
struct ip_vs_iphdr iph;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(af, skb, &iph);
uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
if (uh == NULL) {
@@ -1080,6 +1080,11 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
}
+ IP_VS_DBG(1, "DR XMIT(%p): %pI6c %pI6c via %pI6c dp:%d fw:%d hdr:%d proto:%d len:%d\n",
+ skb, &cp->caddr, &cp->vaddr, &cp->daddr,
+ ntohs(cp->dport), cp->fwmark,
+ ipv6_hdr(skb)->nexthdr, cp->protocol, skb->len);
+
/* MTU checking */
mtu = dst_mtu(&rt->dst);
if (skb->len > mtu) {
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
+ cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */, 0);
if (unlikely(cp == NULL)) {
match = false;
goto out;