diff mbox

[net-next-2.6,1/7] xfrm: introduce basic mark infrastructure

Message ID 1266160732-946-2-git-send-email-hadi@cyberus.ca
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

jamal Feb. 14, 2010, 3:18 p.m. UTC
From: Jamal Hadi Salim <hadi@cyberus.ca>

Add basic structuring and accessors for xfrm mark

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
---
 include/linux/xfrm.h |   12 +++++++++---
 include/net/xfrm.h   |   28 ++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 3 deletions(-)

Comments

Patrick McHardy Feb. 15, 2010, 3:42 p.m. UTC | #1
jamal wrote:
> +static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_kmark *m)
> +{
> +	if (attrs[XFRMA_MARK])
> +		memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(m));
> +	else
> +		m->v = m->m = 0;
> +
> +	return m->v & m->m;
> +}
> +
> +static inline int xfrm_mark_put(struct sk_buff *skb, struct xfrm_kmark *m)
> +{
> +	if (m->m & m->v)
> +		NLA_PUT(skb, XFRMA_MARK, sizeof(struct xfrm_kmark), m);

This doesn't look right. A mark value of 0 with a mask of ~0 won't
be properly dumped. I think this should check for (m->m | m->v).

> +	return 0;
> +
> +nla_put_failure:
> +	return -1;
> +}
> +
>  #endif	/* _NET_XFRM_H */

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
jamal Feb. 15, 2010, 5 p.m. UTC | #2
On Mon, 2010-02-15 at 16:42 +0100, Patrick McHardy wrote:


> This doesn't look right. A mark value of 0 with a mask of ~0 won't
> be properly dumped. I think this should check for (m->m | m->v).
> 

Good point, thanks. I will make that change;

Rest of patches look reasonable?

cheers,
jamal

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy Feb. 15, 2010, 5:06 p.m. UTC | #3
jamal wrote:
> On Mon, 2010-02-15 at 16:42 +0100, Patrick McHardy wrote:
> 
> 
>> This doesn't look right. A mark value of 0 with a mask of ~0 won't
>> be properly dumped. I think this should check for (m->m | m->v).
>>
> 
> Good point, thanks. I will make that change;
> 
> Rest of patches look reasonable?

I couldn't spot any further problems so far.

One related feature which would be nice to have is the ability
to use marks for xfrm tunnel routing. But I'm not sure we can
do this in a backwards compatible way.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
jamal Feb. 15, 2010, 5:14 p.m. UTC | #4
On Mon, 2010-02-15 at 18:06 +0100, Patrick McHardy wrote:

> One related feature which would be nice to have is the ability
> to use marks for xfrm tunnel routing. But I'm not sure we can
> do this in a backwards compatible way.

I take it policy routing by mark is insufficient.
If you have time, can you give me an example setup description of that
and why it would be hard to be backward-compat?
If there's anything i can do in these patches to help, I will be more
than happy to.

cheers,
jamal

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy Feb. 15, 2010, 5:21 p.m. UTC | #5
jamal wrote:
> On Mon, 2010-02-15 at 18:06 +0100, Patrick McHardy wrote:
> 
>> One related feature which would be nice to have is the ability
>> to use marks for xfrm tunnel routing. But I'm not sure we can
>> do this in a backwards compatible way.
> 
> I take it policy routing by mark is insufficient.

The xfrm route lookup doesn't use the packet mark.

> If you have time, can you give me an example setup description of that
> and why it would be hard to be backward-compat?

A couple of years ago I used this in a multipath setup, which
was using CONNMARK to persistently bind connections (tunnels
in this case) to a route after the first selection.

The problem with backwards compatibility is that people using
marks for multipath routing are most likely not expecting the
mark to suddenly take effect for IPsec tunnel routing.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
jamal Feb. 15, 2010, 6:59 p.m. UTC | #6
On Mon, 2010-02-15 at 18:21 +0100, Patrick McHardy wrote:

> The xfrm route lookup doesn't use the packet mark.

I see.
Is there a historical reason why it hasnt been used this way?
Reminds me of the reverse path patch i sent a while back that
caused havoc.. (mark wasnt being used in the reverse path either)

> A couple of years ago I used this in a multipath setup, which
> was using CONNMARK to persistently bind connections (tunnels
> in this case) to a route after the first selection.

Sounds like a reasonable feature to me.

> The problem with backwards compatibility is that people using
> marks for multipath routing are most likely not expecting the
> mark to suddenly take effect for IPsec tunnel routing.

The main reason it works ok for ipsec/policy-routing is because
user space essentially pins down the kernel path. Could you
not solve it via some user space daemon? First packet/event
to user space, download policies and wait until it expires or
route/tunnel goes down to react..

One of the problems maybe the semantics of what a general purpose
tag like mark being left to either the programmer (as in connmark)
or the admin (tc) - so building a general purpose daemon would have
to enforce some semantic to work ok.

cheers,
jamal

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benny Amorsen Feb. 16, 2010, 10:43 a.m. UTC | #7
jamal <hadi@cyberus.ca> writes:

> I take it policy routing by mark is insufficient.

xfrm ignores policy routing. You can't route IPSEC in Linux. This is
actually a fairly annoying limitation. The workaround is to do like
Microsoft: Encapsulate everything in l2tp or gre.


/Benny

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
jamal Feb. 16, 2010, 11:57 a.m. UTC | #8
On Tue, 2010-02-16 at 11:43 +0100, Benny Amorsen wrote:

> xfrm ignores policy routing. You can't route IPSEC in Linux. This is
> actually a fairly annoying limitation. The workaround is to do like
> Microsoft: Encapsulate everything in l2tp or gre.

With these patches if you set policy routing mark, have the proper
setting in the skb or socket for the mark then the proper
route will be selected. If you have an SPD + SA added with the
same mark, those will be used right after the route is selected. 
So essentially you have the same mark across.
Does that solve or alleviate the problem?

cheers,
jamal


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benny Amorsen Feb. 16, 2010, 12:59 p.m. UTC | #9
jamal <hadi@cyberus.ca> writes:

> With these patches if you set policy routing mark, have the proper
> setting in the skb or socket for the mark then the proper
> route will be selected. If you have an SPD + SA added with the
> same mark, those will be used right after the route is selected. 
> So essentially you have the same mark across.
> Does that solve or alleviate the problem?

I don't actually use marks at all, I do policy routing based on source
address. Currently rules are based on source interface, but all IPSEC
traffic comes from the same interface, unlike some tunnel-based
solutions.

Right now packets going out through an IPSEC tunnel do not hit the
routing table at all -- they just get shunted into the tunnel. Anything
that gives me the chance to run the packets through normal routing
before the tunnel grabs them works for me.

From your description, I would add the IPSEC SPD + SA with a specific
mark. Then I would set the mark in the rule table if I want the packets
to go through the tunnel, or clear the mark to have them go through
normal routing. Not perfect, because I would have to replicate parts of
the routing table in the rule table, but it could be made to work.

Perfect would be if I could set mark in the routing table instead of the
rule table, but sometimes perfect is the enemy of good...


/Benny

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
jamal Feb. 16, 2010, 1:16 p.m. UTC | #10
On Tue, 2010-02-16 at 13:59 +0100, Benny Amorsen wrote:

> From your description, I would add the IPSEC SPD + SA with a specific
> mark. Then I would set the mark in the rule table if I want the packets
> to go through the tunnel, or clear the mark to have them go through
> normal routing.

yes.

> Not perfect, because I would have to replicate parts of
> the routing table in the rule table, but it could be made to work.

Agreed this is a problem and not a nice one (the counter arguement is
at least theres a way for some users now..

> Perfect would be if I could set mark in the routing table instead of the
> rule table, but sometimes perfect is the enemy of good...

This is actually an interesting idea and is not far-fetched (and would
certainly get rid of the replication problem). If i understood
correctly, you would have:
ip route add blah blah mark 0x10

and that the routing core will use the mark to (as it does for example
with ifindex) to pick the route? I like the idea for the simple fact it
will reduce immensely configuration in some cases..

cheers,
jamal



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benny Amorsen Feb. 16, 2010, 9:56 p.m. UTC | #11
jamal <hadi@cyberus.ca> writes:

> This is actually an interesting idea and is not far-fetched (and would
> certainly get rid of the replication problem). If i understood
> correctly, you would have:
> ip route add blah blah mark 0x10

Exactly.

> and that the routing core will use the mark to (as it does for example
> with ifindex) to pick the route? I like the idea for the simple fact it
> will reduce immensely configuration in some cases..

It would certainly be handy for me...


/Benny

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
jamal Feb. 17, 2010, 11:58 a.m. UTC | #12
On Tue, 2010-02-16 at 22:56 +0100, Benny Amorsen wrote:
> jamal <hadi@cyberus.ca> writes:

> > ip route add blah blah mark 0x10
> 
> Exactly.
> 
> > and that the routing core will use the mark to (as it does for example
> > with ifindex) to pick the route? I like the idea for the simple fact it
> > will reduce immensely configuration in some cases..
> 
> It would certainly be handy for me...
> 

I would certainly be interested in adding this feature for the reasons
described above. 

An additional interesting connection would be to tie this feature to
grouping of netdevices for the purpose of multipath routing. This would
be the same as what we do currently with bindtodevice but on a group
instead of a single netdevice. It would require to also have general
purpose netdev->mark to group multiple netdevices (for this case).
The dev->mark could also be handy for other things (which have not
been efficiently solved in the past); example, i could add mark 0x10 to
all ppp* devices and then do "ip link ls mark 0x10" and it would only
fetch ppp* (or for shit-and-giggles as some New Brunswickians like to
say, ip link mark 0x10 down)

Patrick, thoughts? see anything breaking from either feature?

cheers,
jamal

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 29e04be..887c533 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -267,8 +267,8 @@  enum xfrm_attr_type_t {
 	XFRMA_ALG_COMP,		/* struct xfrm_algo */
 	XFRMA_ENCAP,		/* struct xfrm_algo + struct xfrm_encap_tmpl */
 	XFRMA_TMPL,		/* 1 or more struct xfrm_user_tmpl */
-	XFRMA_SA,
-	XFRMA_POLICY,
+	XFRMA_SA,		/* struct xfrm_usersa_info  */
+	XFRMA_POLICY,		/*struct xfrm_userpolicy_info */
 	XFRMA_SEC_CTX,		/* struct xfrm_sec_ctx */
 	XFRMA_LTIME_VAL,
 	XFRMA_REPLAY_VAL,
@@ -276,17 +276,23 @@  enum xfrm_attr_type_t {
 	XFRMA_ETIMER_THRESH,
 	XFRMA_SRCADDR,		/* xfrm_address_t */
 	XFRMA_COADDR,		/* xfrm_address_t */
-	XFRMA_LASTUSED,
+	XFRMA_LASTUSED,		/* unsigned long  */
 	XFRMA_POLICY_TYPE,	/* struct xfrm_userpolicy_type */
 	XFRMA_MIGRATE,
 	XFRMA_ALG_AEAD,		/* struct xfrm_algo_aead */
 	XFRMA_KMADDRESS,        /* struct xfrm_user_kmaddress */
 	XFRMA_ALG_AUTH_TRUNC,	/* struct xfrm_algo_auth */
+	XFRMA_MARK,		/* u32 */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
 };
 
+struct xfrm_umark {
+	__u32           v; /* value */
+	__u32           m; /* mask */
+};
+
 enum xfrm_sadattr_type_t {
 	XFRMA_SAD_UNSPEC,
 	XFRMA_SAD_CNT,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0beb413..904527f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -123,6 +123,11 @@  struct xfrm_state_walk {
 	u32			seq;
 };
 
+struct xfrm_kmark {
+	u32           v; /* value */
+	u32           m; /* mask */
+};
+
 /* Full description of state of transformer. */
 struct xfrm_state {
 #ifdef CONFIG_NET_NS
@@ -140,6 +145,7 @@  struct xfrm_state {
 
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
+	struct xfrm_kmark	mark;
 
 	u32			genid;
 
@@ -456,6 +462,7 @@  struct xfrm_tmpl {
 
 #define XFRM_MAX_DEPTH		6
 
+
 struct xfrm_policy_walk_entry {
 	struct list_head	all;
 	u8			dead;
@@ -481,6 +488,7 @@  struct xfrm_policy {
 
 	u32			priority;
 	u32			index;
+	struct xfrm_kmark	mark;
 	struct xfrm_selector	selector;
 	struct xfrm_lifetime_cfg lft;
 	struct xfrm_lifetime_cur curlft;
@@ -1570,4 +1578,24 @@  static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
 }
 #endif
 
+static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_kmark *m)
+{
+	if (attrs[XFRMA_MARK])
+		memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(m));
+	else
+		m->v = m->m = 0;
+
+	return m->v & m->m;
+}
+
+static inline int xfrm_mark_put(struct sk_buff *skb, struct xfrm_kmark *m)
+{
+	if (m->m & m->v)
+		NLA_PUT(skb, XFRMA_MARK, sizeof(struct xfrm_kmark), m);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 #endif	/* _NET_XFRM_H */