| #!/bin/sh |
| |
| # This script demonstrates interaction of conntrack and vrf. |
| # The vrf driver calls the netfilter hooks again, with oif/iif |
| # pointing at the VRF device. |
| # |
| # For ingress, this means first iteration has iifname of lower/real |
| # device. In this script, thats veth0. |
| # Second iteration is iifname set to vrf device, tvrf in this script. |
| # |
| # For egress, this is reversed: first iteration has the vrf device, |
| # second iteration is done with the lower/real/veth0 device. |
| # |
| # test_ct_zone_in demonstrates unexpected change of nftables |
| # behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack |
| # connection on VRF rcv" |
| # |
| # It was possible to assign conntrack zone to a packet (or mark it for |
| # `notracking`) in the prerouting chain before conntrack, based on real iif. |
| # |
| # After the change, the zone assignment is lost and the zone is assigned based |
| # on the VRF master interface (in case such a rule exists). |
| # assignment is lost. Instead, assignment based on the `iif` matching |
| # Thus it is impossible to distinguish packets based on the original |
| # interface. |
| # |
| # test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem |
| # that was supposed to be fixed by the commit mentioned above to make sure |
| # that any fix to test case 1 won't break masquerade again. |
| |
| ksft_skip=4 |
| |
| IP0=172.30.30.1 |
| IP1=172.30.30.2 |
| PFXL=30 |
| ret=0 |
| |
| sfx=$(mktemp -u "XXXXXXXX") |
| ns0="ns0-$sfx" |
| ns1="ns1-$sfx" |
| |
| cleanup() |
| { |
| ip netns pids $ns0 | xargs kill 2>/dev/null |
| ip netns pids $ns1 | xargs kill 2>/dev/null |
| |
| ip netns del $ns0 $ns1 |
| } |
| |
| nft --version > /dev/null 2>&1 |
| if [ $? -ne 0 ];then |
| echo "SKIP: Could not run test without nft tool" |
| exit $ksft_skip |
| fi |
| |
| ip -Version > /dev/null 2>&1 |
| if [ $? -ne 0 ];then |
| echo "SKIP: Could not run test without ip tool" |
| exit $ksft_skip |
| fi |
| |
| ip netns add "$ns0" |
| if [ $? -ne 0 ];then |
| echo "SKIP: Could not create net namespace $ns0" |
| exit $ksft_skip |
| fi |
| ip netns add "$ns1" |
| |
| trap cleanup EXIT |
| |
| ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 |
| ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 |
| ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 |
| |
| ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 |
| if [ $? -ne 0 ];then |
| echo "SKIP: Could not add veth device" |
| exit $ksft_skip |
| fi |
| |
| ip -net $ns0 li add tvrf type vrf table 9876 |
| if [ $? -ne 0 ];then |
| echo "SKIP: Could not add vrf device" |
| exit $ksft_skip |
| fi |
| |
| ip -net $ns0 li set lo up |
| |
| ip -net $ns0 li set veth0 master tvrf |
| ip -net $ns0 li set tvrf up |
| ip -net $ns0 li set veth0 up |
| ip -net $ns1 li set veth0 up |
| |
| ip -net $ns0 addr add $IP0/$PFXL dev veth0 |
| ip -net $ns1 addr add $IP1/$PFXL dev veth0 |
| |
| ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& |
| if [ $? -ne 0 ];then |
| echo "SKIP: Could not start iperf3" |
| exit $ksft_skip |
| fi |
| |
| # test vrf ingress handling. |
| # The incoming connection should be placed in conntrack zone 1, |
| # as decided by the first iteration of the ruleset. |
| test_ct_zone_in() |
| { |
| ip netns exec $ns0 nft -f - <<EOF |
| table testct { |
| chain rawpre { |
| type filter hook prerouting priority raw; |
| |
| iif { veth0, tvrf } counter meta nftrace set 1 |
| iif veth0 counter ct zone set 1 counter return |
| iif tvrf counter ct zone set 2 counter return |
| ip protocol icmp counter |
| notrack counter |
| } |
| |
| chain rawout { |
| type filter hook output priority raw; |
| |
| oif veth0 counter ct zone set 1 counter return |
| oif tvrf counter ct zone set 2 counter return |
| notrack counter |
| } |
| } |
| EOF |
| ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null |
| |
| # should be in zone 1, not zone 2 |
| count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) |
| if [ $count -eq 1 ]; then |
| echo "PASS: entry found in conntrack zone 1" |
| else |
| echo "FAIL: entry not found in conntrack zone 1" |
| count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) |
| if [ $count -eq 1 ]; then |
| echo "FAIL: entry found in zone 2 instead" |
| else |
| echo "FAIL: entry not in zone 1 or 2, dumping table" |
| ip netns exec $ns0 conntrack -L |
| ip netns exec $ns0 nft list ruleset |
| fi |
| fi |
| } |
| |
| # add masq rule that gets evaluated w. outif set to vrf device. |
| # This tests the first iteration of the packet through conntrack, |
| # oifname is the vrf device. |
| test_masquerade_vrf() |
| { |
| local qdisc=$1 |
| |
| if [ "$qdisc" != "default" ]; then |
| tc -net $ns0 qdisc add dev tvrf root $qdisc |
| fi |
| |
| ip netns exec $ns0 conntrack -F 2>/dev/null |
| |
| ip netns exec $ns0 nft -f - <<EOF |
| flush ruleset |
| table ip nat { |
| chain rawout { |
| type filter hook output priority raw; |
| |
| oif tvrf ct state untracked counter |
| } |
| chain postrouting2 { |
| type filter hook postrouting priority mangle; |
| |
| oif tvrf ct state untracked counter |
| } |
| chain postrouting { |
| type nat hook postrouting priority 0; |
| # NB: masquerade should always be combined with 'oif(name) bla', |
| # lack of this is intentional here, we want to exercise double-snat. |
| ip saddr 172.30.30.0/30 counter masquerade random |
| } |
| } |
| EOF |
| ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null |
| if [ $? -ne 0 ]; then |
| echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" |
| ret=1 |
| return |
| fi |
| |
| # must also check that nat table was evaluated on second (lower device) iteration. |
| ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' && |
| ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]' |
| if [ $? -eq 0 ]; then |
| echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" |
| else |
| echo "FAIL: vrf rules have unexpected counter value" |
| ret=1 |
| fi |
| |
| if [ "$qdisc" != "default" ]; then |
| tc -net $ns0 qdisc del dev tvrf root |
| fi |
| } |
| |
| # add masq rule that gets evaluated w. outif set to veth device. |
| # This tests the 2nd iteration of the packet through conntrack, |
| # oifname is the lower device (veth0 in this case). |
| test_masquerade_veth() |
| { |
| ip netns exec $ns0 conntrack -F 2>/dev/null |
| ip netns exec $ns0 nft -f - <<EOF |
| flush ruleset |
| table ip nat { |
| chain postrouting { |
| type nat hook postrouting priority 0; |
| meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random |
| } |
| } |
| EOF |
| ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null |
| if [ $? -ne 0 ]; then |
| echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" |
| ret=1 |
| return |
| fi |
| |
| # must also check that nat table was evaluated on second (lower device) iteration. |
| ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' |
| if [ $? -eq 0 ]; then |
| echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" |
| else |
| echo "FAIL: vrf masq rule has unexpected counter value" |
| ret=1 |
| fi |
| } |
| |
| test_ct_zone_in |
| test_masquerade_vrf "default" |
| test_masquerade_vrf "pfifo" |
| test_masquerade_veth |
| |
| exit $ret |