br_netfilter: namespace bridge netfilter sysctls

Currently, the /proc/sys/net/bridge folder is only created in the initial
network namespace. This patch ensures that the /proc/sys/net/bridge folder
is available in each network namespace if the module is loaded and
disappears from all network namespaces when the module is unloaded.

In doing so the patch makes the sysctls:

bridge-nf-call-arptables
bridge-nf-call-ip6tables
bridge-nf-call-iptables
bridge-nf-filter-pppoe-tagged
bridge-nf-filter-vlan-tagged
bridge-nf-pass-vlan-input-dev

apply per network namespace. This unblocks some use-cases where users would
like to e.g. not do bridge filtering for bridges in a specific network
namespace while doing so for bridges located in another network namespace.

The netfilter rules are afaict already per network namespace so it should
be safe for users to specify whether bridge devices inside a network
namespace are supposed to go through iptables et al. or not. Also, this can
already be done per-bridge by setting an option for each individual bridge
via Netlink. It should also be possible to do this for all bridges in a
network namespace via sysctls.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 74af19c..e51f596 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -48,7 +48,8 @@
 	return port ? &port->br->fake_rtable : NULL;
 }
 
-struct net_device *setup_pre_routing(struct sk_buff *skb);
+struct net_device *setup_pre_routing(struct sk_buff *skb,
+				     const struct net *net);
 void br_netfilter_enable(void);
 
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 656a084..8a33268 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -72,17 +72,17 @@
 		return 0;
 }
 
-#define IS_VLAN_IP(skb) \
+#define IS_VLAN_IP(skb, net) \
 	(vlan_proto(skb) == htons(ETH_P_IP) && \
-	 init_net.brnf.filter_vlan_tagged)
+	 net->brnf.filter_vlan_tagged)
 
-#define IS_VLAN_IPV6(skb) \
+#define IS_VLAN_IPV6(skb, net) \
 	(vlan_proto(skb) == htons(ETH_P_IPV6) && \
-	 init_net.brnf.filter_vlan_tagged)
+	 net->brnf.filter_vlan_tagged)
 
-#define IS_VLAN_ARP(skb) \
+#define IS_VLAN_ARP(skb, net) \
 	(vlan_proto(skb) == htons(ETH_P_ARP) &&	\
-	 init_net.brnf.filter_vlan_tagged)
+	 net->brnf.filter_vlan_tagged)
 
 static inline __be16 pppoe_proto(const struct sk_buff *skb)
 {
@@ -90,15 +90,15 @@
 			    sizeof(struct pppoe_hdr)));
 }
 
-#define IS_PPPOE_IP(skb) \
+#define IS_PPPOE_IP(skb, net) \
 	(skb->protocol == htons(ETH_P_PPP_SES) && \
 	 pppoe_proto(skb) == htons(PPP_IP) && \
-	 init_net.brnf.filter_pppoe_tagged)
+	 net->brnf.filter_pppoe_tagged)
 
-#define IS_PPPOE_IPV6(skb) \
+#define IS_PPPOE_IPV6(skb, net) \
 	(skb->protocol == htons(ETH_P_PPP_SES) && \
 	 pppoe_proto(skb) == htons(PPP_IPV6) && \
-	 init_net.brnf.filter_pppoe_tagged)
+	 net->brnf.filter_pppoe_tagged)
 
 /* largest possible L2 header, see br_nf_dev_queue_xmit() */
 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
@@ -408,12 +408,14 @@
 	return 0;
 }
 
-static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev)
+static struct net_device *brnf_get_logical_dev(struct sk_buff *skb,
+					       const struct net_device *dev,
+					       const struct net *net)
 {
 	struct net_device *vlan, *br;
 
 	br = bridge_parent(dev);
-	if (init_net.brnf.pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
+	if (net->brnf.pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
 		return br;
 
 	vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
@@ -423,7 +425,7 @@
 }
 
 /* Some common code for IPv4/IPv6 */
-struct net_device *setup_pre_routing(struct sk_buff *skb)
+struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 
@@ -434,7 +436,7 @@
 
 	nf_bridge->in_prerouting = 1;
 	nf_bridge->physindev = skb->dev;
-	skb->dev = brnf_get_logical_dev(skb, skb->dev);
+	skb->dev = brnf_get_logical_dev(skb, skb->dev, net);
 
 	if (skb->protocol == htons(ETH_P_8021Q))
 		nf_bridge->orig_proto = BRNF_PROTO_8021Q;
@@ -469,8 +471,9 @@
 		return NF_DROP;
 	br = p->br;
 
-	if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) {
-		if (!init_net.brnf.call_ip6tables &&
+	if (IS_IPV6(skb) || IS_VLAN_IPV6(skb, state->net) ||
+	    IS_PPPOE_IPV6(skb, state->net)) {
+		if (!state->net->brnf.call_ip6tables &&
 		    !br_opt_get(br, BROPT_NF_CALL_IP6TABLES))
 			return NF_ACCEPT;
 
@@ -478,11 +481,12 @@
 		return br_nf_pre_routing_ipv6(priv, skb, state);
 	}
 
-	if (!init_net.brnf.call_iptables &&
+	if (!state->net->brnf.call_iptables &&
 	    !br_opt_get(br, BROPT_NF_CALL_IPTABLES))
 		return NF_ACCEPT;
 
-	if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))
+	if (!IS_IP(skb) && !IS_VLAN_IP(skb, state->net) &&
+	    !IS_PPPOE_IP(skb, state->net))
 		return NF_ACCEPT;
 
 	nf_bridge_pull_encap_header_rcsum(skb);
@@ -493,7 +497,7 @@
 	nf_bridge_put(skb->nf_bridge);
 	if (!nf_bridge_alloc(skb))
 		return NF_DROP;
-	if (!setup_pre_routing(skb))
+	if (!setup_pre_routing(skb, state->net))
 		return NF_DROP;
 
 	nf_bridge = nf_bridge_info_get(skb);
@@ -515,7 +519,7 @@
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct net_device *in;
 
-	if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
+	if (!IS_ARP(skb) && !IS_VLAN_ARP(skb, net)) {
 
 		if (skb->protocol == htons(ETH_P_IP))
 			nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
@@ -569,9 +573,11 @@
 	if (!parent)
 		return NF_DROP;
 
-	if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
+	if (IS_IP(skb) || IS_VLAN_IP(skb, state->net) ||
+	    IS_PPPOE_IP(skb, state->net))
 		pf = NFPROTO_IPV4;
-	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
+	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb, state->net) ||
+		 IS_PPPOE_IPV6(skb, state->net))
 		pf = NFPROTO_IPV6;
 	else
 		return NF_ACCEPT;
@@ -602,7 +608,7 @@
 		skb->protocol = htons(ETH_P_IPV6);
 
 	NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
-		brnf_get_logical_dev(skb, state->in),
+		brnf_get_logical_dev(skb, state->in, state->net),
 		parent,	br_nf_forward_finish);
 
 	return NF_STOLEN;
@@ -621,18 +627,18 @@
 		return NF_ACCEPT;
 	br = p->br;
 
-	if (!init_net.brnf.call_arptables &&
+	if (!state->net->brnf.call_arptables &&
 	    !br_opt_get(br, BROPT_NF_CALL_ARPTABLES))
 		return NF_ACCEPT;
 
 	if (!IS_ARP(skb)) {
-		if (!IS_VLAN_ARP(skb))
+		if (!IS_VLAN_ARP(skb, state->net))
 			return NF_ACCEPT;
 		nf_bridge_pull_encap_header(skb);
 	}
 
 	if (arp_hdr(skb)->ar_pln != 4) {
-		if (IS_VLAN_ARP(skb))
+		if (IS_VLAN_ARP(skb, state->net))
 			nf_bridge_push_encap_header(skb);
 		return NF_ACCEPT;
 	}
@@ -787,9 +793,11 @@
 	if (!realoutdev)
 		return NF_DROP;
 
-	if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
+	if (IS_IP(skb) || IS_VLAN_IP(skb, state->net) ||
+	    IS_PPPOE_IP(skb, state->net))
 		pf = NFPROTO_IPV4;
-	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
+	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb, state->net) ||
+		 IS_PPPOE_IPV6(skb, state->net))
 		pf = NFPROTO_IPV6;
 	else
 		return NF_ACCEPT;
@@ -1071,6 +1079,49 @@
 	brnf->pass_vlan_indev = 0;
 }
 
+static __net_init int br_netfilter_sysctl_init_net(struct net *net)
+{
+	struct ctl_table *table = brnf_table;
+
+	if (net_eq(net, &init_net))
+		return 0;
+
+	table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	table[0].data = &net->brnf.call_arptables;
+	table[1].data = &net->brnf.call_iptables;
+	table[2].data = &net->brnf.call_ip6tables;
+	table[3].data = &net->brnf.filter_vlan_tagged;
+	table[4].data = &net->brnf.filter_pppoe_tagged;
+	table[5].data = &net->brnf.pass_vlan_indev;
+
+	net->brnf.ctl_hdr = register_net_sysctl(net, "net/bridge", table);
+	if (!net->brnf.ctl_hdr) {
+		kfree(table);
+		return -ENOMEM;
+	}
+
+	br_netfilter_sysctl_default(&net->brnf);
+
+	return 0;
+}
+
+static __net_exit void br_netfilter_sysctl_exit_net(struct net *net)
+{
+	if (net_eq(net, &init_net))
+		return;
+
+	unregister_net_sysctl_table(net->brnf.ctl_hdr);
+	kfree(net->brnf.ctl_hdr->ctl_table_arg);
+}
+
+static struct pernet_operations br_netfilter_sysctl_ops = {
+	.init = br_netfilter_sysctl_init_net,
+	.exit = br_netfilter_sysctl_exit_net,
+};
+
 static int __init br_netfilter_init(void)
 {
 	int ret;
@@ -1097,6 +1148,14 @@
 		unregister_pernet_subsys(&brnf_net_ops);
 		return -ENOMEM;
 	}
+
+	ret = register_pernet_subsys(&br_netfilter_sysctl_ops);
+	if (ret < 0) {
+		unregister_netdevice_notifier(&brnf_notifier);
+		unregister_pernet_subsys(&brnf_net_ops);
+		unregister_net_sysctl_table(init_net.brnf.ctl_hdr);
+		return ret;
+	}
 #endif
 	RCU_INIT_POINTER(nf_br_ops, &br_ops);
 	printk(KERN_NOTICE "Bridge firewalling registered\n");
@@ -1110,6 +1169,7 @@
 	unregister_pernet_subsys(&brnf_net_ops);
 #ifdef CONFIG_SYSCTL
 	unregister_net_sysctl_table(init_net.brnf.ctl_hdr);
+	unregister_pernet_subsys(&br_netfilter_sysctl_ops);
 #endif
 }
 
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 96c072e..d2220e5 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -227,7 +227,7 @@
 	nf_bridge_put(skb->nf_bridge);
 	if (!nf_bridge_alloc(skb))
 		return NF_DROP;
-	if (!setup_pre_routing(skb))
+	if (!setup_pre_routing(skb, state->net))
 		return NF_DROP;
 
 	nf_bridge = nf_bridge_info_get(skb);