@@ -18,6 +18,7 @@
#include <stdlib.h>
#include <stdio.h>
+#include "bitmap.h"
#include "command-line.h"
#include "daemon.h"
#include "dirs.h"
@@ -62,6 +63,8 @@ static const char *ovnsb_db;
/* MAC address management (macam) table of "struct eth_addr"s, that holds the
* MAC addresses allocated by the OVN ipam module. */
static struct hmap macam = HMAP_INITIALIZER(&macam);
+
+#define MAX_OVN_TAGS 4096
/* Pipeline stages. */
@@ -885,8 +888,7 @@ ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
}
static void
-build_ipam(struct northd_context *ctx, struct hmap *datapaths,
- struct hmap *ports)
+build_ipam(struct hmap *datapaths, struct hmap *ports)
{
/* IPAM generally stands for IP address management. In non-virtualized
* world, MAC addresses come with the hardware. But, with virtualized
@@ -894,10 +896,6 @@ build_ipam(struct northd_context *ctx, struct hmap *datapaths,
* does both IP address management (ipam) and MAC address management
* (macam). */
- if (!ctx->ovnnb_txn) {
- return;
- }
-
/* If the switch's other_config:subnet is set, allocate new addresses for
* ports that have the "dynamic" keyword in their addresses column. */
struct ovn_datapath *od;
@@ -955,12 +953,111 @@ build_ipam(struct northd_context *ctx, struct hmap *datapaths,
}
}
+/* Tag allocation for nested containers.
+ *
+ * For a logical switch port with 'parent_name' and a request to allocate tags,
+ * keeps a track of all allocated tags. */
+struct tag_alloc_node {
+ struct hmap_node hmap_node;
+ char *parent_name;
+ unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
+};
+
+static void
+tag_alloc_destroy(struct hmap *tag_alloc_table)
+{
+ struct tag_alloc_node *node;
+ HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
+ bitmap_free(node->allocated_tags);
+ free(node->parent_name);
+ free(node);
+ }
+ hmap_destroy(tag_alloc_table);
+}
+
+static struct tag_alloc_node *
+tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
+{
+ /* If a node for the 'parent_name' exists, return it. */
+ struct tag_alloc_node *tag_alloc_node;
+ HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
+ hash_string(parent_name, 0),
+ tag_alloc_table) {
+ if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
+ return tag_alloc_node;
+ }
+ }
+
+ /* Create a new node. */
+ tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
+ tag_alloc_node->parent_name = xstrdup(parent_name);
+ tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
+ /* Tag 0 is invalid for nested containers. */
+ bitmap_set1(tag_alloc_node->allocated_tags, 0);
+ hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
+ hash_string(parent_name, 0));
+
+ return tag_alloc_node;
+}
+
+static void
+tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
+ const struct nbrec_logical_switch_port *nbsp)
+{
+ /* Add the tags of already existing nested containers. If there is no
+ * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
+ if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
+ return;
+ }
+
+ struct tag_alloc_node *tag_alloc_node;
+ tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
+ bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
+}
+
+static void
+tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
+ const struct nbrec_logical_switch_port *nbsp)
+{
+ if (!nbsp->tag_request) {
+ return;
+ }
+
+ if (nbsp->parent_name && nbsp->parent_name[0]
+ && *nbsp->tag_request == 0) {
+ /* For nested containers that need allocation, do the allocation. */
+
+ if (nbsp->tag) {
+ /* This has already been allocated. */
+ return;
+ }
+
+ struct tag_alloc_node *tag_alloc_node;
+ int64_t tag;
+ tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
+ nbsp->parent_name);
+ tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1,
+ MAX_OVN_TAGS + 1);
+ if (tag == MAX_OVN_TAGS + 1) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
+ "parent %s", nbsp->parent_name);
+ return;
+ }
+ bitmap_set1(tag_alloc_node->allocated_tags, tag);
+ nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
+ } else if (*nbsp->tag_request != 0) {
+ /* For everything else, copy the contents of 'tag_request' to 'tag'. */
+ nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
+ }
+}
+
static void
join_logical_ports(struct northd_context *ctx,
struct hmap *datapaths, struct hmap *ports,
- struct ovs_list *sb_only, struct ovs_list *nb_only,
- struct ovs_list *both)
+ struct hmap *tag_alloc_table, struct ovs_list *sb_only,
+ struct ovs_list *nb_only, struct ovs_list *both)
{
hmap_init(ports);
ovs_list_init(sb_only);
@@ -1053,6 +1150,7 @@ join_logical_ports(struct northd_context *ctx,
op->od = od;
ipam_add_port_addresses(od, op);
+ tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
}
} else {
for (size_t i = 0; i < od->nbr->n_ports; i++) {
@@ -1243,13 +1341,21 @@ build_ports(struct northd_context *ctx, struct hmap *datapaths,
struct hmap *ports)
{
struct ovs_list sb_only, nb_only, both;
+ struct hmap tag_alloc_table;
+ hmap_init(&tag_alloc_table);
- join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
+ join_logical_ports(ctx, datapaths, ports, &tag_alloc_table, &sb_only,
+ &nb_only, &both);
- /* For logical ports that are in both databases, update the southbound
- * record based on northbound data. Also index the in-use tunnel_keys. */
struct ovn_port *op, *next;
+ /* For logical ports that are in both databases, update the southbound
+ * record based on northbound data. Also index the in-use tunnel_keys.
+ * For logical ports that are in NB database, do any tag allocation
+ * needed. */
LIST_FOR_EACH_SAFE (op, next, list, &both) {
+ if (op->nbsp) {
+ tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
+ }
ovn_port_update_sbrec(op);
add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
@@ -1286,6 +1392,8 @@ build_ports(struct northd_context *ctx, struct hmap *datapaths,
if (remove_mac_bindings) {
cleanup_mac_bindings(ctx, ports);
}
+
+ tag_alloc_destroy(&tag_alloc_table);
}
#define OVN_MIN_MULTICAST 32768
@@ -3979,13 +4087,13 @@ sync_address_sets(struct northd_context *ctx)
static void
ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
{
- if (!ctx->ovnsb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnnb_idl)) {
+ if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
return;
}
struct hmap datapaths, ports;
build_datapaths(ctx, &datapaths);
build_ports(ctx, &datapaths, &ports);
- build_ipam(ctx, &datapaths, &ports);
+ build_ipam(&datapaths, &ports);
build_lflows(ctx, &datapaths, &ports);
sync_address_sets(ctx);
@@ -1,7 +1,7 @@
{
"name": "OVN_Northbound",
- "version": "5.3.1",
- "cksum": "1921908091 9353",
+ "version": "5.3.2",
+ "cksum": "45731008 9609",
"tables": {
"NB_Global": {
"columns": {
@@ -48,6 +48,11 @@
"min": 0,
"max": "unlimited"}},
"parent_name": {"type": {"key": "string", "min": 0, "max": 1}},
+ "tag_request": {
+ "type": {"key": {"type": "integer",
+ "minInteger": 0,
+ "maxInteger": 4095},
+ "min": 0, "max": 1}},
"tag": {
"type": {"key": {"type": "integer",
"minInteger": 1,
@@ -327,10 +327,19 @@
other <ref table="Logical_Switch_Port"/>.
</column>
- <column name="tag">
+ <column name="tag_request">
<p>
The VLAN tag in the network traffic associated with a container's
- network interface.
+ network interface. The client can request <code>ovn-northd</code>
+ to allocate a uniqe tag for the logical switch port with a specific
+ parent (specified in <ref column="parent_name"/>) by setting a value
+ of <code>0</code> in this column. The allocated value is written
+ by <code>ovn-northd</code> in the <ref column="tag"/> column
+ (Note that these tags are allocated and managed locally in
+ <code>ovn-northd</code>, so they cannot be reconstructed in the event
+ that the database is lost). The client can also request a specific
+ non-zero tag and <code>ovn-northd</code> will honor it and copy that
+ value to the <ref column="tag"/> column.
</p>
<p>
@@ -340,6 +349,13 @@
match incoming traffic and is also added to outgoing traffic.
</p>
</column>
+
+ <column name="tag">
+ <p>
+ The VLAN tag allocated by <code>ovn-northd</code> based on the
+ contents of the <ref column="tag_request"/> column.
+ </p>
+ </column>
</group>
<group title="Port State">
@@ -120,12 +120,14 @@
</p>
</dd>
- <dt>[<code>--may-exist</code>] <code>lsp-add</code> <var>switch</var> <var>port</var> <var>parent</var> <var>tag</var></dt>
+ <dt>[<code>--may-exist</code>] <code>lsp-add</code> <var>switch</var> <var>port</var> <var>parent</var> <var>tag_request</var></dt>
<dd>
<p>
Creates on <var>switch</var> a logical switch port named
<var>port</var> that is a child of <var>parent</var> that is
- identifed with VLAN ID <var>tag</var>. This is useful in
+ identifed with VLAN ID <var>tag_request</var>. For a given
+ <var>parent</var>, if <var>tag_request</var> is <code>0</code>,
+ <code>ovn-northd</code> generates a unique tag. This is useful in
cases such as virtualized container environments where Open
vSwitch does not have a direct connection to the container's
port and it must be shared with the virtual machine's port.
@@ -136,7 +138,7 @@
exists, unless <code>--may-exist</code> is specified. Regardless of
<code>--may-exist</code>, it is an error if the existing port is not
in <var>switch</var> or if it does not have the specified
- <var>parent</var> and <var>tag</var>.
+ <var>parent</var> and <var>tag_request</var>.
</p>
</dd>
@@ -776,12 +776,13 @@ nbctl_lsp_add(struct ctl_context *ctx)
lsp_name, lsp->parent_name);
}
- if (!lsp->n_tag) {
- ctl_fatal("%s: port already exists but has no tag",
+ if (!lsp->n_tag_request) {
+ ctl_fatal("%s: port already exists but has no tag_request",
lsp_name);
- } else if (lsp->tag[0] != tag) {
+ } else if (lsp->tag_request[0] != tag) {
ctl_fatal("%s: port already exists with different "
- "tag %"PRId64, lsp_name, lsp->tag[0]);
+ "tag_request %"PRId64, lsp_name,
+ lsp->tag_request[0]);
}
} else {
if (lsp->parent_name) {
@@ -798,7 +799,7 @@ nbctl_lsp_add(struct ctl_context *ctx)
nbrec_logical_switch_port_set_name(lsp, lsp_name);
if (tag >= 0) {
nbrec_logical_switch_port_set_parent_name(lsp, parent_name);
- nbrec_logical_switch_port_set_tag(lsp, &tag, 1);
+ nbrec_logical_switch_port_set_tag_request(lsp, &tag, 1);
}
/* Insert the logical port into the logical switch. */
@@ -132,11 +132,11 @@ AT_CHECK([ovn-nbctl --may-exist lsp-add ls0 lp2 lp4 5], [1], [],
[ovn-nbctl: lp2: port already exists with different parent lp3
])
AT_CHECK([ovn-nbctl --may-exist lsp-add ls0 lp2 lp3 10], [1], [],
- [ovn-nbctl: lp2: port already exists with different tag 5
+ [ovn-nbctl: lp2: port already exists with different tag_request 5
])
-AT_CHECK([ovn-nbctl clear Logical_Switch_Port lp2 tag])
+AT_CHECK([ovn-nbctl clear Logical_Switch_Port lp2 tag_request])
AT_CHECK([ovn-nbctl --may-exist lsp-add ls0 lp2 lp3 5], [1], [],
- [ovn-nbctl: lp2: port already exists but has no tag
+ [ovn-nbctl: lp2: port already exists but has no tag_request
])
OVN_NBCTL_TEST_STOP
@@ -5053,3 +5053,119 @@ OVS_WAIT_UNTIL([test `ovs-ofctl dump-flows br-int table=0 | grep REG13 | wc -l`
OVN_CLEANUP([hv1])
AT_CLEANUP
+
+AT_SETUP([ovn -- tag allocation])
+AT_KEYWORDS([ovntags])
+ovn_start
+
+AT_CHECK([ovn-nbctl ls-add ls0])
+AT_CHECK([ovn-nbctl lsp-add ls0 parent1])
+AT_CHECK([ovn-nbctl lsp-add ls0 parent2])
+AT_CHECK([ovn-nbctl ls-add ls1])
+
+dnl When a tag is provided, no allocation is done
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls1 c0 parent1 3])
+AT_CHECK([ovn-nbctl lsp-get-tag c0], [0], [3
+])
+dnl The same 'tag' gets created in southbound database.
+AT_CHECK([ovn-sbctl --data=bare --no-heading --columns=tag find port_binding \
+logical_port="c0"], [0], [3
+])
+
+dnl Allocate tags and see it getting created in both NB and SB
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls1 c1 parent1 0])
+AT_CHECK([ovn-nbctl lsp-get-tag c1], [0], [1
+])
+AT_CHECK([ovn-sbctl --data=bare --no-heading --columns=tag find port_binding \
+logical_port="c1"], [0], [1
+])
+
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls1 c2 parent1 0])
+AT_CHECK([ovn-nbctl lsp-get-tag c2], [0], [2
+])
+AT_CHECK([ovn-sbctl --data=bare --no-heading --columns=tag find port_binding \
+logical_port="c2"], [0], [2
+])
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls1 c3 parent1 0])
+AT_CHECK([ovn-nbctl lsp-get-tag c3], [0], [4
+])
+AT_CHECK([ovn-sbctl --data=bare --no-heading --columns=tag find port_binding \
+logical_port="c3"], [0], [4
+])
+
+dnl A differnet parent.
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls1 c4 parent2 0])
+AT_CHECK([ovn-nbctl lsp-get-tag c4], [0], [1
+])
+AT_CHECK([ovn-sbctl --data=bare --no-heading --columns=tag find port_binding \
+logical_port="c4"], [0], [1
+])
+
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls1 c5 parent2 0])
+AT_CHECK([ovn-nbctl lsp-get-tag c5], [0], [2
+])
+AT_CHECK([ovn-sbctl --data=bare --no-heading --columns=tag find port_binding \
+logical_port="c5"], [0], [2
+])
+
+dnl Delete a logical port and create a new one.
+AT_CHECK([ovn-nbctl --wait=sb lsp-del c1])
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls1 c6 parent1 0])
+AT_CHECK([ovn-nbctl lsp-get-tag c6], [0], [1
+])
+AT_CHECK([ovn-sbctl --data=bare --no-heading --columns=tag find port_binding \
+logical_port="c6"], [0], [1
+])
+
+dnl Restart northd to see that the same allocation remains.
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+start_daemon ovn-northd \
+ --ovnnb-db=unix:"$ovs_base"/ovn-nb/ovn-nb.sock \
+ --ovnsb-db=unix:"$ovs_base"/ovn-sb/ovn-sb.sock
+
+dnl Create a switch to make sure that ovn-northd has run through the main loop.
+AT_CHECK([ovn-nbctl --wait=sb ls-add ls-dummy])
+AT_CHECK([ovn-nbctl lsp-get-tag c0], [0], [3
+])
+AT_CHECK([ovn-nbctl lsp-get-tag c6], [0], [1
+])
+AT_CHECK([ovn-nbctl lsp-get-tag c2], [0], [2
+])
+AT_CHECK([ovn-nbctl lsp-get-tag c3], [0], [4
+])
+AT_CHECK([ovn-nbctl lsp-get-tag c4], [0], [1
+])
+AT_CHECK([ovn-nbctl lsp-get-tag c5], [0], [2
+])
+
+dnl Create a switch port with a tag that has already been allocated.
+dnl It should go through fine with a duplicate tag.
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls1 c7 parent2 2])
+AT_CHECK([ovn-nbctl lsp-get-tag c7], [0], [2
+])
+AT_CHECK([ovn-sbctl --data=bare --no-heading --columns=tag find port_binding \
+logical_port="c7"], [0], [2
+])
+AT_CHECK([ovn-nbctl lsp-get-tag c5], [0], [2
+])
+
+AT_CHECK([ovn-nbctl ls-add ls2])
+dnl When there is no parent_name provided (for say, 'localnet'), 'tag_request'
+dnl gets copied to 'tag'
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls2 local0 "" 25])
+AT_CHECK([ovn-nbctl lsp-get-tag local0], [0], [25
+])
+dnl The same 'tag' gets created in southbound database.
+AT_CHECK([ovn-sbctl --data=bare --no-heading --columns=tag find port_binding \
+logical_port="local0"], [0], [25
+])
+dnl If 'tag_request' is 0 for localnet, nothing gets written to 'tag'
+AT_CHECK([ovn-nbctl --wait=sb lsp-add ls2 local1 "" 0])
+AT_CHECK([ovn-nbctl lsp-get-tag local1])
+dnl change the tag_request.
+AT_CHECK([ovn-nbctl --wait=sb set logical_switch_port local1 tag_request=50])
+AT_CHECK([ovn-nbctl lsp-get-tag local1], [0], [50
+])
+
+AT_CLEANUP
When there are hundreds of nodes controlled by OVN, the workflow to track and allocate unique tags across multiple hosts becomes complicated. It is much easier to let ovn-northd do the allocation. Signed-off-by: Gurucharan Shetty <guru@ovn.org> --- ovn/northd/ovn-northd.c | 134 ++++++++++++++++++++++++++++++++++++++---- ovn/ovn-nb.ovsschema | 9 ++- ovn/ovn-nb.xml | 20 ++++++- ovn/utilities/ovn-nbctl.8.xml | 8 ++- ovn/utilities/ovn-nbctl.c | 11 ++-- tests/ovn-nbctl.at | 6 +- tests/ovn.at | 116 ++++++++++++++++++++++++++++++++++++ 7 files changed, 276 insertions(+), 28 deletions(-)