diff mbox series

[net-next,5/5] net: mscc: ocelot: support L2 multicast entries

Message ID 20201029022738.722794-6-vladimir.oltean@nxp.com
State Accepted
Delegated to: David Miller
Headers show
Series L2 multicast forwarding for Ocelot switch | expand

Checks

Context Check Description
jkicinski/cover_letter success Link
jkicinski/fixes_present success Link
jkicinski/patch_count success Link
jkicinski/tree_selection success Clearly marked for net-next
jkicinski/subject_prefix success Link
jkicinski/source_inline success Was 0 now: 0
jkicinski/verify_signedoff success Link
jkicinski/module_param success Was 0 now: 0
jkicinski/build_32bit fail Errors and warnings before: 4 this patch: 4
jkicinski/kdoc success Errors and warnings before: 0 this patch: 0
jkicinski/verify_fixes success Link
jkicinski/checkpatch fail Link
jkicinski/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
jkicinski/header_inline success Link
jkicinski/stable success Stable not CCed

Commit Message

Vladimir Oltean Oct. 29, 2020, 2:27 a.m. UTC
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.

Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
  fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
  entry. That would be odd because we can only have ~60 PGIDs, but
  thousands of MDB entries. So instead, we want to reserve a multicast
  PGID for every single port combination for multicast traffic. And
  since we can have 2 (or more) MDB entries delivered to the same port
  group (and therefore PGID), we need to reference-count the PGIDs.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
 drivers/net/ethernet/mscc/ocelot.c | 109 ++++++++++++++++++++++-------
 drivers/net/ethernet/mscc/ocelot.h |  16 ++++-
 include/soc/mscc/ocelot.h          |   1 +
 3 files changed, 100 insertions(+), 26 deletions(-)

Comments

Florian Fainelli Oct. 29, 2020, 2:41 a.m. UTC | #1
On 10/28/2020 7:27 PM, Vladimir Oltean wrote:
> There is one main difference in mscc_ocelot between IP multicast and L2
> multicast. With IP multicast, destination ports are encoded into the
> upper bytes of the multicast MAC address. Example: to deliver the
> address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
> program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
> multicast, the MAC table entry points to a Port Group ID (PGID), and
> that PGID contains the port mask that the packet will be forwarded to.
> As to why it is this way, no clue. My guess is that not all port
> combinations can be supported simultaneously with the limited number of
> PGIDs, and this was somehow an issue for IP multicast but not for L2
> multicast. Anyway.
> 
> Prior to this change, the raw L2 multicast code was bogus, due to the
> fact that there wasn't really any way to test it using the bridge code.
> There were 2 issues:
> - A multicast PGID was allocated for each MDB entry, but it wasn't in
>   fact programmed to hardware. It was dummy.
> - In fact we don't want to reserve a multicast PGID for every single MDB
>   entry. That would be odd because we can only have ~60 PGIDs, but
>   thousands of MDB entries. So instead, we want to reserve a multicast
>   PGID for every single port combination for multicast traffic. And
>   since we can have 2 (or more) MDB entries delivered to the same port
>   group (and therefore PGID), we need to reference-count the PGIDs.
> 
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>

I believe you have the same gfp_t comment applicable here as in patch #4.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 713ab6ec8c8d..323dbd30661a 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -961,10 +961,37 @@  static enum macaccess_entry_type ocelot_classify_mdb(const unsigned char *addr)
 	return ENTRYTYPE_LOCKED;
 }
 
-static int ocelot_mdb_get_pgid(struct ocelot *ocelot,
-			       const struct ocelot_multicast *mc)
+static struct ocelot_pgid *ocelot_pgid_alloc(struct ocelot *ocelot, int index,
+					     unsigned long ports)
 {
-	int pgid;
+	struct ocelot_pgid *pgid;
+
+	pgid = kzalloc(sizeof(*pgid), GFP_KERNEL);
+	if (!pgid)
+		return ERR_PTR(-ENOMEM);
+
+	pgid->ports = ports;
+	pgid->index = index;
+	refcount_set(&pgid->refcount, 1);
+	list_add_tail(&pgid->list, &ocelot->pgids);
+
+	return pgid;
+}
+
+static void ocelot_pgid_free(struct ocelot *ocelot, struct ocelot_pgid *pgid)
+{
+	if (!refcount_dec_and_test(&pgid->refcount))
+		return;
+
+	list_del(&pgid->list);
+	kfree(pgid);
+}
+
+static struct ocelot_pgid *ocelot_mdb_get_pgid(struct ocelot *ocelot,
+					       const struct ocelot_multicast *mc)
+{
+	struct ocelot_pgid *pgid;
+	int index;
 
 	/* According to VSC7514 datasheet 3.9.1.5 IPv4 Multicast Entries and
 	 * 3.9.1.6 IPv6 Multicast Entries, "Instead of a lookup in the
@@ -973,24 +1000,34 @@  static int ocelot_mdb_get_pgid(struct ocelot *ocelot,
 	 */
 	if (mc->entry_type == ENTRYTYPE_MACv4 ||
 	    mc->entry_type == ENTRYTYPE_MACv6)
-		return 0;
+		return ocelot_pgid_alloc(ocelot, 0, mc->ports);
+
+	list_for_each_entry(pgid, &ocelot->pgids, list) {
+		/* When searching for a nonreserved multicast PGID, ignore the
+		 * dummy PGID of zero that we have for MACv4/MACv6 entries
+		 */
+		if (pgid->index && pgid->ports == mc->ports) {
+			refcount_inc(&pgid->refcount);
+			return pgid;
+		}
+	}
 
-	for_each_nonreserved_multicast_dest_pgid(ocelot, pgid) {
-		struct ocelot_multicast *mc;
+	/* Search for a free index in the nonreserved multicast PGID area */
+	for_each_nonreserved_multicast_dest_pgid(ocelot, index) {
 		bool used = false;
 
-		list_for_each_entry(mc, &ocelot->multicast, list) {
-			if (mc->pgid == pgid) {
+		list_for_each_entry(pgid, &ocelot->pgids, list) {
+			if (pgid->index == index) {
 				used = true;
 				break;
 			}
 		}
 
 		if (!used)
-			return pgid;
+			return ocelot_pgid_alloc(ocelot, index, mc->ports);
 	}
 
-	return -1;
+	return ERR_PTR(-ENOSPC);
 }
 
 static void ocelot_encode_ports_to_mdb(unsigned char *addr,
@@ -1014,6 +1051,7 @@  int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
 	unsigned char addr[ETH_ALEN];
 	struct ocelot_multicast *mc;
+	struct ocelot_pgid *pgid;
 	u16 vid = mdb->vid;
 
 	if (port == ocelot->npi)
@@ -1025,8 +1063,6 @@  int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
 	mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
 	if (!mc) {
 		/* New entry */
-		int pgid;
-
 		mc = devm_kzalloc(ocelot->dev, sizeof(*mc), GFP_KERNEL);
 		if (!mc)
 			return -ENOMEM;
@@ -1035,27 +1071,36 @@  int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
 		ether_addr_copy(mc->addr, mdb->addr);
 		mc->vid = vid;
 
-		pgid = ocelot_mdb_get_pgid(ocelot, mc);
-
-		if (pgid < 0) {
-			dev_err(ocelot->dev,
-				"No more PGIDs available for mdb %pM vid %d\n",
-				mdb->addr, vid);
-			return -ENOSPC;
-		}
-
-		mc->pgid = pgid;
-
 		list_add_tail(&mc->list, &ocelot->multicast);
 	} else {
+		/* Existing entry. Clean up the current port mask from
+		 * hardware now, because we'll be modifying it.
+		 */
+		ocelot_pgid_free(ocelot, mc->pgid);
 		ocelot_encode_ports_to_mdb(addr, mc);
 		ocelot_mact_forget(ocelot, addr, vid);
 	}
 
 	mc->ports |= BIT(port);
+
+	pgid = ocelot_mdb_get_pgid(ocelot, mc);
+	if (IS_ERR(pgid)) {
+		dev_err(ocelot->dev,
+			"Cannot allocate PGID for mdb %pM vid %d\n",
+			mc->addr, mc->vid);
+		devm_kfree(ocelot->dev, mc);
+		return PTR_ERR(pgid);
+	}
+	mc->pgid = pgid;
+
 	ocelot_encode_ports_to_mdb(addr, mc);
 
-	return ocelot_mact_learn(ocelot, mc->pgid, addr, vid,
+	if (mc->entry_type != ENTRYTYPE_MACv4 &&
+	    mc->entry_type != ENTRYTYPE_MACv6)
+		ocelot_write_rix(ocelot, pgid->ports, ANA_PGID_PGID,
+				 pgid->index);
+
+	return ocelot_mact_learn(ocelot, pgid->index, addr, vid,
 				 mc->entry_type);
 }
 EXPORT_SYMBOL(ocelot_port_mdb_add);
@@ -1066,6 +1111,7 @@  int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
 	unsigned char addr[ETH_ALEN];
 	struct ocelot_multicast *mc;
+	struct ocelot_pgid *pgid;
 	u16 vid = mdb->vid;
 
 	if (port == ocelot->npi)
@@ -1081,6 +1127,7 @@  int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
 	ocelot_encode_ports_to_mdb(addr, mc);
 	ocelot_mact_forget(ocelot, addr, vid);
 
+	ocelot_pgid_free(ocelot, mc->pgid);
 	mc->ports &= ~BIT(port);
 	if (!mc->ports) {
 		list_del(&mc->list);
@@ -1088,9 +1135,20 @@  int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
 		return 0;
 	}
 
+	/* We have a PGID with fewer ports now */
+	pgid = ocelot_mdb_get_pgid(ocelot, mc);
+	if (IS_ERR(pgid))
+		return PTR_ERR(pgid);
+	mc->pgid = pgid;
+
 	ocelot_encode_ports_to_mdb(addr, mc);
 
-	return ocelot_mact_learn(ocelot, mc->pgid, addr, vid,
+	if (mc->entry_type != ENTRYTYPE_MACv4 &&
+	    mc->entry_type != ENTRYTYPE_MACv6)
+		ocelot_write_rix(ocelot, pgid->ports, ANA_PGID_PGID,
+				 pgid->index);
+
+	return ocelot_mact_learn(ocelot, pgid->index, addr, vid,
 				 mc->entry_type);
 }
 EXPORT_SYMBOL(ocelot_port_mdb_del);
@@ -1449,6 +1507,7 @@  int ocelot_init(struct ocelot *ocelot)
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&ocelot->multicast);
+	INIT_LIST_HEAD(&ocelot->pgids);
 	ocelot_mact_init(ocelot);
 	ocelot_vlan_init(ocelot);
 	ocelot_vcap_init(ocelot);
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index 7f8b34c49971..291d39d49c4e 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -79,13 +79,27 @@  enum macaccess_entry_type {
 	ENTRYTYPE_MACv6,
 };
 
+/* A (PGID) port mask structure, encoding the 2^ocelot->num_phys_ports
+ * possibilities of egress port masks for L2 multicast traffic.
+ * For a switch with 9 user ports, there are 512 possible port masks, but the
+ * hardware only has 46 individual PGIDs that it can forward multicast traffic
+ * to. So we need a structure that maps the limited PGID indices to the port
+ * destinations requested by the user for L2 multicast.
+ */
+struct ocelot_pgid {
+	unsigned long ports;
+	int index;
+	refcount_t refcount;
+	struct list_head list;
+};
+
 struct ocelot_multicast {
 	struct list_head list;
 	enum macaccess_entry_type entry_type;
 	unsigned char addr[ETH_ALEN];
 	u16 vid;
 	u16 ports;
-	int pgid;
+	struct ocelot_pgid *pgid;
 };
 
 int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid,
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 1e9db9577441..cc126d1796be 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -632,6 +632,7 @@  struct ocelot {
 	u32				*lags;
 
 	struct list_head		multicast;
+	struct list_head		pgids;
 
 	struct list_head		dummy_rules;
 	struct ocelot_vcap_block	block[3];