| // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
| /* Copyright (c) 2021 Mellanox Technologies. */ |
| |
| #include <net/fib_notifier.h> |
| #include <net/nexthop.h> |
| #include "tc_tun_encap.h" |
| #include "en_tc.h" |
| #include "tc_tun.h" |
| #include "rep/tc.h" |
| #include "diag/en_tc_tracepoint.h" |
| |
| enum { |
| MLX5E_ROUTE_ENTRY_VALID = BIT(0), |
| }; |
| |
| struct mlx5e_route_key { |
| int ip_version; |
| union { |
| __be32 v4; |
| struct in6_addr v6; |
| } endpoint_ip; |
| }; |
| |
| struct mlx5e_route_entry { |
| struct mlx5e_route_key key; |
| struct list_head encap_entries; |
| struct list_head decap_flows; |
| u32 flags; |
| struct hlist_node hlist; |
| refcount_t refcnt; |
| int tunnel_dev_index; |
| struct rcu_head rcu; |
| }; |
| |
| struct mlx5e_tc_tun_encap { |
| struct mlx5e_priv *priv; |
| struct notifier_block fib_nb; |
| spinlock_t route_lock; /* protects route_tbl */ |
| unsigned long route_tbl_last_update; |
| DECLARE_HASHTABLE(route_tbl, 8); |
| }; |
| |
| static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) |
| { |
| return r->flags & MLX5E_ROUTE_ENTRY_VALID; |
| } |
| |
| int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, |
| struct mlx5_flow_spec *spec) |
| { |
| struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; |
| struct mlx5_rx_tun_attr *tun_attr; |
| void *daddr, *saddr; |
| u8 ip_version; |
| |
| tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); |
| if (!tun_attr) |
| return -ENOMEM; |
| |
| esw_attr->rx_tun_attr = tun_attr; |
| ip_version = mlx5e_tc_get_ip_version(spec, true); |
| |
| if (ip_version == 4) { |
| daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
| outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); |
| saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
| outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); |
| tun_attr->dst_ip.v4 = *(__be32 *)daddr; |
| tun_attr->src_ip.v4 = *(__be32 *)saddr; |
| if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) |
| return 0; |
| } |
| #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) |
| else if (ip_version == 6) { |
| int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); |
| struct in6_addr zerov6 = {}; |
| |
| daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
| outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); |
| saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, |
| outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); |
| memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); |
| memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); |
| if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || |
| !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) |
| return 0; |
| } |
| #endif |
| /* Only set the flag if both src and dst ip addresses exist. They are |
| * required to establish routing. |
| */ |
| flow_flag_set(flow, TUN_RX); |
| flow->attr->tun_ip_version = ip_version; |
| return 0; |
| } |
| |
| static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) |
| { |
| bool all_flow_encaps_valid = true; |
| int i; |
| |
| /* Flow can be associated with multiple encap entries. |
| * Before offloading the flow verify that all of them have |
| * a valid neighbour. |
| */ |
| for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { |
| if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) |
| continue; |
| if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { |
| all_flow_encaps_valid = false; |
| break; |
| } |
| } |
| |
| return all_flow_encaps_valid; |
| } |
| |
| void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, |
| struct mlx5e_encap_entry *e, |
| struct list_head *flow_list) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5_pkt_reformat_params reformat_params; |
| struct mlx5_esw_flow_attr *esw_attr; |
| struct mlx5_flow_handle *rule; |
| struct mlx5_flow_attr *attr; |
| struct mlx5_flow_spec *spec; |
| struct mlx5e_tc_flow *flow; |
| int err; |
| |
| if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) |
| return; |
| |
| memset(&reformat_params, 0, sizeof(reformat_params)); |
| reformat_params.type = e->reformat_type; |
| reformat_params.size = e->encap_size; |
| reformat_params.data = e->encap_header; |
| e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, |
| &reformat_params, |
| MLX5_FLOW_NAMESPACE_FDB); |
| if (IS_ERR(e->pkt_reformat)) { |
| mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", |
| PTR_ERR(e->pkt_reformat)); |
| return; |
| } |
| e->flags |= MLX5_ENCAP_ENTRY_VALID; |
| mlx5e_rep_queue_neigh_stats_work(priv); |
| |
| list_for_each_entry(flow, flow_list, tmp_list) { |
| if (!mlx5e_is_offloaded_flow(flow)) |
| continue; |
| attr = flow->attr; |
| esw_attr = attr->esw_attr; |
| spec = &attr->parse_attr->spec; |
| |
| esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; |
| esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; |
| |
| /* Do not offload flows with unresolved neighbors */ |
| if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) |
| continue; |
| /* update from slow path rule to encap rule */ |
| rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); |
| if (IS_ERR(rule)) { |
| err = PTR_ERR(rule); |
| mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", |
| err); |
| continue; |
| } |
| |
| mlx5e_tc_unoffload_from_slow_path(esw, flow); |
| flow->rule[0] = rule; |
| /* was unset when slow path rule removed */ |
| flow_flag_set(flow, OFFLOADED); |
| } |
| } |
| |
| void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, |
| struct mlx5e_encap_entry *e, |
| struct list_head *flow_list) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5_esw_flow_attr *esw_attr; |
| struct mlx5_flow_handle *rule; |
| struct mlx5_flow_attr *attr; |
| struct mlx5_flow_spec *spec; |
| struct mlx5e_tc_flow *flow; |
| int err; |
| |
| list_for_each_entry(flow, flow_list, tmp_list) { |
| if (!mlx5e_is_offloaded_flow(flow)) |
| continue; |
| attr = flow->attr; |
| esw_attr = attr->esw_attr; |
| spec = &attr->parse_attr->spec; |
| |
| /* update from encap rule to slow path rule */ |
| rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); |
| /* mark the flow's encap dest as non-valid */ |
| esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; |
| |
| if (IS_ERR(rule)) { |
| err = PTR_ERR(rule); |
| mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", |
| err); |
| continue; |
| } |
| |
| mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); |
| flow->rule[0] = rule; |
| /* was unset when fast path rule removed */ |
| flow_flag_set(flow, OFFLOADED); |
| } |
| |
| /* we know that the encap is valid */ |
| e->flags &= ~MLX5_ENCAP_ENTRY_VALID; |
| mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); |
| } |
| |
| static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, |
| struct list_head *flow_list, |
| int index) |
| { |
| if (IS_ERR(mlx5e_flow_get(flow))) |
| return; |
| wait_for_completion(&flow->init_done); |
| |
| flow->tmp_entry_index = index; |
| list_add(&flow->tmp_list, flow_list); |
| } |
| |
| /* Takes reference to all flows attached to encap and adds the flows to |
| * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. |
| */ |
| void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) |
| { |
| struct encap_flow_item *efi; |
| struct mlx5e_tc_flow *flow; |
| |
| list_for_each_entry(efi, &e->flows, list) { |
| flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); |
| mlx5e_take_tmp_flow(flow, flow_list, efi->index); |
| } |
| } |
| |
| /* Takes reference to all flows attached to route and adds the flows to |
| * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. |
| */ |
| static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, |
| struct list_head *flow_list) |
| { |
| struct mlx5e_tc_flow *flow; |
| |
| list_for_each_entry(flow, &r->decap_flows, decap_routes) |
| mlx5e_take_tmp_flow(flow, flow_list, 0); |
| } |
| |
| typedef bool (match_cb)(struct mlx5e_encap_entry *); |
| |
| static struct mlx5e_encap_entry * |
| mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, |
| struct mlx5e_encap_entry *e, |
| match_cb match) |
| { |
| struct mlx5e_encap_entry *next = NULL; |
| |
| retry: |
| rcu_read_lock(); |
| |
| /* find encap with non-zero reference counter value */ |
| for (next = e ? |
| list_next_or_null_rcu(&nhe->encap_list, |
| &e->encap_list, |
| struct mlx5e_encap_entry, |
| encap_list) : |
| list_first_or_null_rcu(&nhe->encap_list, |
| struct mlx5e_encap_entry, |
| encap_list); |
| next; |
| next = list_next_or_null_rcu(&nhe->encap_list, |
| &next->encap_list, |
| struct mlx5e_encap_entry, |
| encap_list)) |
| if (mlx5e_encap_take(next)) |
| break; |
| |
| rcu_read_unlock(); |
| |
| /* release starting encap */ |
| if (e) |
| mlx5e_encap_put(netdev_priv(e->out_dev), e); |
| if (!next) |
| return next; |
| |
| /* wait for encap to be fully initialized */ |
| wait_for_completion(&next->res_ready); |
| /* continue searching if encap entry is not in valid state after completion */ |
| if (!match(next)) { |
| e = next; |
| goto retry; |
| } |
| |
| return next; |
| } |
| |
| static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) |
| { |
| return e->flags & MLX5_ENCAP_ENTRY_VALID; |
| } |
| |
| static struct mlx5e_encap_entry * |
| mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, |
| struct mlx5e_encap_entry *e) |
| { |
| return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); |
| } |
| |
| static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) |
| { |
| return e->compl_result >= 0; |
| } |
| |
| struct mlx5e_encap_entry * |
| mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, |
| struct mlx5e_encap_entry *e) |
| { |
| return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); |
| } |
| |
| void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) |
| { |
| struct mlx5e_neigh *m_neigh = &nhe->m_neigh; |
| struct mlx5e_encap_entry *e = NULL; |
| struct mlx5e_tc_flow *flow; |
| struct mlx5_fc *counter; |
| struct neigh_table *tbl; |
| bool neigh_used = false; |
| struct neighbour *n; |
| u64 lastuse; |
| |
| if (m_neigh->family == AF_INET) |
| tbl = &arp_tbl; |
| #if IS_ENABLED(CONFIG_IPV6) |
| else if (m_neigh->family == AF_INET6) |
| tbl = ipv6_stub->nd_tbl; |
| #endif |
| else |
| return; |
| |
| /* mlx5e_get_next_valid_encap() releases previous encap before returning |
| * next one. |
| */ |
| while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { |
| struct mlx5e_priv *priv = netdev_priv(e->out_dev); |
| struct encap_flow_item *efi, *tmp; |
| struct mlx5_eswitch *esw; |
| LIST_HEAD(flow_list); |
| |
| esw = priv->mdev->priv.eswitch; |
| mutex_lock(&esw->offloads.encap_tbl_lock); |
| list_for_each_entry_safe(efi, tmp, &e->flows, list) { |
| flow = container_of(efi, struct mlx5e_tc_flow, |
| encaps[efi->index]); |
| if (IS_ERR(mlx5e_flow_get(flow))) |
| continue; |
| list_add(&flow->tmp_list, &flow_list); |
| |
| if (mlx5e_is_offloaded_flow(flow)) { |
| counter = mlx5e_tc_get_counter(flow); |
| lastuse = mlx5_fc_query_lastuse(counter); |
| if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { |
| neigh_used = true; |
| break; |
| } |
| } |
| } |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| |
| mlx5e_put_flow_list(priv, &flow_list); |
| if (neigh_used) { |
| /* release current encap before breaking the loop */ |
| mlx5e_encap_put(priv, e); |
| break; |
| } |
| } |
| |
| trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); |
| |
| if (neigh_used) { |
| nhe->reported_lastuse = jiffies; |
| |
| /* find the relevant neigh according to the cached device and |
| * dst ip pair |
| */ |
| n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); |
| if (!n) |
| return; |
| |
| neigh_event_send(n, NULL); |
| neigh_release(n); |
| } |
| } |
| |
| static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) |
| { |
| WARN_ON(!list_empty(&e->flows)); |
| |
| if (e->compl_result > 0) { |
| mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); |
| |
| if (e->flags & MLX5_ENCAP_ENTRY_VALID) |
| mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); |
| } |
| |
| kfree(e->tun_info); |
| kfree(e->encap_header); |
| kfree_rcu(e, rcu); |
| } |
| |
| static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, |
| struct mlx5e_decap_entry *d) |
| { |
| WARN_ON(!list_empty(&d->flows)); |
| |
| if (!d->compl_result) |
| mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); |
| |
| kfree_rcu(d, rcu); |
| } |
| |
| void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| |
| if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) |
| return; |
| list_del(&e->route_list); |
| hash_del_rcu(&e->encap_hlist); |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| |
| mlx5e_encap_dealloc(priv, e); |
| } |
| |
| static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| |
| if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) |
| return; |
| hash_del_rcu(&d->hlist); |
| mutex_unlock(&esw->offloads.decap_tbl_lock); |
| |
| mlx5e_decap_dealloc(priv, d); |
| } |
| |
| static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow, |
| int out_index); |
| |
| void mlx5e_detach_encap(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow, int out_index) |
| { |
| struct mlx5e_encap_entry *e = flow->encaps[out_index].e; |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| |
| if (flow->attr->esw_attr->dests[out_index].flags & |
| MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) |
| mlx5e_detach_encap_route(priv, flow, out_index); |
| |
| /* flow wasn't fully initialized */ |
| if (!e) |
| return; |
| |
| mutex_lock(&esw->offloads.encap_tbl_lock); |
| list_del(&flow->encaps[out_index].list); |
| flow->encaps[out_index].e = NULL; |
| if (!refcount_dec_and_test(&e->refcnt)) { |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| return; |
| } |
| list_del(&e->route_list); |
| hash_del_rcu(&e->encap_hlist); |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| |
| mlx5e_encap_dealloc(priv, e); |
| } |
| |
| void mlx5e_detach_decap(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_decap_entry *d = flow->decap_reformat; |
| |
| if (!d) |
| return; |
| |
| mutex_lock(&esw->offloads.decap_tbl_lock); |
| list_del(&flow->l3_to_l2_reformat); |
| flow->decap_reformat = NULL; |
| |
| if (!refcount_dec_and_test(&d->refcnt)) { |
| mutex_unlock(&esw->offloads.decap_tbl_lock); |
| return; |
| } |
| hash_del_rcu(&d->hlist); |
| mutex_unlock(&esw->offloads.decap_tbl_lock); |
| |
| mlx5e_decap_dealloc(priv, d); |
| } |
| |
| bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, |
| struct mlx5e_encap_key *b) |
| { |
| return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && |
| a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; |
| } |
| |
| static int cmp_decap_info(struct mlx5e_decap_key *a, |
| struct mlx5e_decap_key *b) |
| { |
| return memcmp(&a->key, &b->key, sizeof(b->key)); |
| } |
| |
| static int hash_encap_info(struct mlx5e_encap_key *key) |
| { |
| return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), |
| key->tc_tunnel->tunnel_type); |
| } |
| |
| static int hash_decap_info(struct mlx5e_decap_key *key) |
| { |
| return jhash(&key->key, sizeof(key->key), 0); |
| } |
| |
| bool mlx5e_encap_take(struct mlx5e_encap_entry *e) |
| { |
| return refcount_inc_not_zero(&e->refcnt); |
| } |
| |
| static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) |
| { |
| return refcount_inc_not_zero(&e->refcnt); |
| } |
| |
| static struct mlx5e_encap_entry * |
| mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, |
| uintptr_t hash_key) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_encap_key e_key; |
| struct mlx5e_encap_entry *e; |
| |
| hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, |
| encap_hlist, hash_key) { |
| e_key.ip_tun_key = &e->tun_info->key; |
| e_key.tc_tunnel = e->tunnel; |
| if (e->tunnel->encap_info_equal(&e_key, key) && |
| mlx5e_encap_take(e)) |
| return e; |
| } |
| |
| return NULL; |
| } |
| |
| static struct mlx5e_decap_entry * |
| mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, |
| uintptr_t hash_key) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_decap_key r_key; |
| struct mlx5e_decap_entry *e; |
| |
| hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, |
| hlist, hash_key) { |
| r_key = e->key; |
| if (!cmp_decap_info(&r_key, key) && |
| mlx5e_decap_take(e)) |
| return e; |
| } |
| return NULL; |
| } |
| |
| struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) |
| { |
| size_t tun_size = sizeof(*tun_info) + tun_info->options_len; |
| |
| return kmemdup(tun_info, tun_size, GFP_KERNEL); |
| } |
| |
| static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow, |
| int out_index, |
| struct mlx5e_encap_entry *e, |
| struct netlink_ext_ack *extack) |
| { |
| int i; |
| |
| for (i = 0; i < out_index; i++) { |
| if (flow->encaps[i].e != e) |
| continue; |
| NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); |
| netdev_err(priv->netdev, "can't duplicate encap action\n"); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, |
| struct mlx5_flow_attr *attr, |
| struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, |
| struct net_device *out_dev, |
| int route_dev_ifindex, |
| int out_index) |
| { |
| struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; |
| struct net_device *route_dev; |
| u16 vport_num; |
| int err = 0; |
| u32 data; |
| |
| route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); |
| |
| if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || |
| !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) |
| goto out; |
| |
| err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); |
| if (err) |
| goto out; |
| |
| attr->dest_chain = 0; |
| attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; |
| esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; |
| data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, |
| vport_num); |
| err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, |
| MLX5_FLOW_NAMESPACE_FDB, |
| VPORT_TO_REG, data); |
| if (err >= 0) { |
| esw_attr->dests[out_index].src_port_rewrite_act_id = err; |
| err = 0; |
| } |
| |
| out: |
| if (route_dev) |
| dev_put(route_dev); |
| return err; |
| } |
| |
| static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, |
| struct mlx5_esw_flow_attr *attr, |
| struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, |
| struct net_device *out_dev, |
| int route_dev_ifindex, |
| int out_index) |
| { |
| int act_id = attr->dests[out_index].src_port_rewrite_act_id; |
| struct net_device *route_dev; |
| u16 vport_num; |
| int err = 0; |
| u32 data; |
| |
| route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); |
| |
| if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || |
| !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { |
| err = -ENODEV; |
| goto out; |
| } |
| |
| err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); |
| if (err) |
| goto out; |
| |
| data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, |
| vport_num); |
| mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); |
| |
| out: |
| if (route_dev) |
| dev_put(route_dev); |
| return err; |
| } |
| |
| static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5_rep_uplink_priv *uplink_priv; |
| struct mlx5e_rep_priv *uplink_rpriv; |
| struct mlx5e_tc_tun_encap *encap; |
| unsigned int ret; |
| |
| uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); |
| uplink_priv = &uplink_rpriv->uplink_priv; |
| encap = uplink_priv->encap; |
| |
| spin_lock_bh(&encap->route_lock); |
| ret = encap->route_tbl_last_update; |
| spin_unlock_bh(&encap->route_lock); |
| return ret; |
| } |
| |
| static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow, |
| struct mlx5e_encap_entry *e, |
| bool new_encap_entry, |
| unsigned long tbl_time_before, |
| int out_index); |
| |
| int mlx5e_attach_encap(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow, |
| struct net_device *mirred_dev, |
| int out_index, |
| struct netlink_ext_ack *extack, |
| struct net_device **encap_dev, |
| bool *encap_valid) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_tc_flow_parse_attr *parse_attr; |
| struct mlx5_flow_attr *attr = flow->attr; |
| const struct ip_tunnel_info *tun_info; |
| unsigned long tbl_time_before = 0; |
| struct mlx5e_encap_entry *e; |
| struct mlx5e_encap_key key; |
| bool entry_created = false; |
| unsigned short family; |
| uintptr_t hash_key; |
| int err = 0; |
| |
| parse_attr = attr->parse_attr; |
| tun_info = parse_attr->tun_info[out_index]; |
| family = ip_tunnel_info_af(tun_info); |
| key.ip_tun_key = &tun_info->key; |
| key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); |
| if (!key.tc_tunnel) { |
| NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); |
| return -EOPNOTSUPP; |
| } |
| |
| hash_key = hash_encap_info(&key); |
| |
| mutex_lock(&esw->offloads.encap_tbl_lock); |
| e = mlx5e_encap_get(priv, &key, hash_key); |
| |
| /* must verify if encap is valid or not */ |
| if (e) { |
| /* Check that entry was not already attached to this flow */ |
| if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { |
| err = -EOPNOTSUPP; |
| goto out_err; |
| } |
| |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| wait_for_completion(&e->res_ready); |
| |
| /* Protect against concurrent neigh update. */ |
| mutex_lock(&esw->offloads.encap_tbl_lock); |
| if (e->compl_result < 0) { |
| err = -EREMOTEIO; |
| goto out_err; |
| } |
| goto attach_flow; |
| } |
| |
| e = kzalloc(sizeof(*e), GFP_KERNEL); |
| if (!e) { |
| err = -ENOMEM; |
| goto out_err; |
| } |
| |
| refcount_set(&e->refcnt, 1); |
| init_completion(&e->res_ready); |
| entry_created = true; |
| INIT_LIST_HEAD(&e->route_list); |
| |
| tun_info = mlx5e_dup_tun_info(tun_info); |
| if (!tun_info) { |
| err = -ENOMEM; |
| goto out_err_init; |
| } |
| e->tun_info = tun_info; |
| err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); |
| if (err) |
| goto out_err_init; |
| |
| INIT_LIST_HEAD(&e->flows); |
| hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); |
| tbl_time_before = mlx5e_route_tbl_get_last_update(priv); |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| |
| if (family == AF_INET) |
| err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); |
| else if (family == AF_INET6) |
| err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); |
| |
| /* Protect against concurrent neigh update. */ |
| mutex_lock(&esw->offloads.encap_tbl_lock); |
| complete_all(&e->res_ready); |
| if (err) { |
| e->compl_result = err; |
| goto out_err; |
| } |
| e->compl_result = 1; |
| |
| attach_flow: |
| err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, |
| out_index); |
| if (err) |
| goto out_err; |
| |
| flow->encaps[out_index].e = e; |
| list_add(&flow->encaps[out_index].list, &e->flows); |
| flow->encaps[out_index].index = out_index; |
| *encap_dev = e->out_dev; |
| if (e->flags & MLX5_ENCAP_ENTRY_VALID) { |
| attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; |
| attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; |
| *encap_valid = true; |
| } else { |
| *encap_valid = false; |
| } |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| |
| return err; |
| |
| out_err: |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| if (e) |
| mlx5e_encap_put(priv, e); |
| return err; |
| |
| out_err_init: |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| kfree(tun_info); |
| kfree(e); |
| return err; |
| } |
| |
| int mlx5e_attach_decap(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow, |
| struct netlink_ext_ack *extack) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; |
| struct mlx5_pkt_reformat_params reformat_params; |
| struct mlx5e_tc_flow_parse_attr *parse_attr; |
| struct mlx5e_decap_entry *d; |
| struct mlx5e_decap_key key; |
| uintptr_t hash_key; |
| int err = 0; |
| |
| parse_attr = flow->attr->parse_attr; |
| if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { |
| NL_SET_ERR_MSG_MOD(extack, |
| "encap header larger than max supported"); |
| return -EOPNOTSUPP; |
| } |
| |
| key.key = parse_attr->eth; |
| hash_key = hash_decap_info(&key); |
| mutex_lock(&esw->offloads.decap_tbl_lock); |
| d = mlx5e_decap_get(priv, &key, hash_key); |
| if (d) { |
| mutex_unlock(&esw->offloads.decap_tbl_lock); |
| wait_for_completion(&d->res_ready); |
| mutex_lock(&esw->offloads.decap_tbl_lock); |
| if (d->compl_result) { |
| err = -EREMOTEIO; |
| goto out_free; |
| } |
| goto found; |
| } |
| |
| d = kzalloc(sizeof(*d), GFP_KERNEL); |
| if (!d) { |
| err = -ENOMEM; |
| goto out_err; |
| } |
| |
| d->key = key; |
| refcount_set(&d->refcnt, 1); |
| init_completion(&d->res_ready); |
| INIT_LIST_HEAD(&d->flows); |
| hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); |
| mutex_unlock(&esw->offloads.decap_tbl_lock); |
| |
| memset(&reformat_params, 0, sizeof(reformat_params)); |
| reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; |
| reformat_params.size = sizeof(parse_attr->eth); |
| reformat_params.data = &parse_attr->eth; |
| d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, |
| &reformat_params, |
| MLX5_FLOW_NAMESPACE_FDB); |
| if (IS_ERR(d->pkt_reformat)) { |
| err = PTR_ERR(d->pkt_reformat); |
| d->compl_result = err; |
| } |
| mutex_lock(&esw->offloads.decap_tbl_lock); |
| complete_all(&d->res_ready); |
| if (err) |
| goto out_free; |
| |
| found: |
| flow->decap_reformat = d; |
| attr->decap_pkt_reformat = d->pkt_reformat; |
| list_add(&flow->l3_to_l2_reformat, &d->flows); |
| mutex_unlock(&esw->offloads.decap_tbl_lock); |
| return 0; |
| |
| out_free: |
| mutex_unlock(&esw->offloads.decap_tbl_lock); |
| mlx5e_decap_put(priv, d); |
| return err; |
| |
| out_err: |
| mutex_unlock(&esw->offloads.decap_tbl_lock); |
| return err; |
| } |
| |
| static int cmp_route_info(struct mlx5e_route_key *a, |
| struct mlx5e_route_key *b) |
| { |
| if (a->ip_version == 4 && b->ip_version == 4) |
| return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, |
| sizeof(a->endpoint_ip.v4)); |
| else if (a->ip_version == 6 && b->ip_version == 6) |
| return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, |
| sizeof(a->endpoint_ip.v6)); |
| return 1; |
| } |
| |
| static u32 hash_route_info(struct mlx5e_route_key *key) |
| { |
| if (key->ip_version == 4) |
| return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); |
| return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); |
| } |
| |
| static void mlx5e_route_dealloc(struct mlx5e_priv *priv, |
| struct mlx5e_route_entry *r) |
| { |
| WARN_ON(!list_empty(&r->decap_flows)); |
| WARN_ON(!list_empty(&r->encap_entries)); |
| |
| kfree_rcu(r, rcu); |
| } |
| |
| static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| |
| if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) |
| return; |
| |
| hash_del_rcu(&r->hlist); |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| |
| mlx5e_route_dealloc(priv, r); |
| } |
| |
| static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| |
| lockdep_assert_held(&esw->offloads.encap_tbl_lock); |
| |
| if (!refcount_dec_and_test(&r->refcnt)) |
| return; |
| hash_del_rcu(&r->hlist); |
| mlx5e_route_dealloc(priv, r); |
| } |
| |
| static struct mlx5e_route_entry * |
| mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, |
| u32 hash_key) |
| { |
| struct mlx5e_route_key r_key; |
| struct mlx5e_route_entry *r; |
| |
| hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { |
| r_key = r->key; |
| if (!cmp_route_info(&r_key, key) && |
| refcount_inc_not_zero(&r->refcnt)) |
| return r; |
| } |
| return NULL; |
| } |
| |
| static struct mlx5e_route_entry * |
| mlx5e_route_get_create(struct mlx5e_priv *priv, |
| struct mlx5e_route_key *key, |
| int tunnel_dev_index, |
| unsigned long *route_tbl_change_time) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5_rep_uplink_priv *uplink_priv; |
| struct mlx5e_rep_priv *uplink_rpriv; |
| struct mlx5e_tc_tun_encap *encap; |
| struct mlx5e_route_entry *r; |
| u32 hash_key; |
| |
| uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); |
| uplink_priv = &uplink_rpriv->uplink_priv; |
| encap = uplink_priv->encap; |
| |
| hash_key = hash_route_info(key); |
| spin_lock_bh(&encap->route_lock); |
| r = mlx5e_route_get(encap, key, hash_key); |
| spin_unlock_bh(&encap->route_lock); |
| if (r) { |
| if (!mlx5e_route_entry_valid(r)) { |
| mlx5e_route_put_locked(priv, r); |
| return ERR_PTR(-EINVAL); |
| } |
| return r; |
| } |
| |
| r = kzalloc(sizeof(*r), GFP_KERNEL); |
| if (!r) |
| return ERR_PTR(-ENOMEM); |
| |
| r->key = *key; |
| r->flags |= MLX5E_ROUTE_ENTRY_VALID; |
| r->tunnel_dev_index = tunnel_dev_index; |
| refcount_set(&r->refcnt, 1); |
| INIT_LIST_HEAD(&r->decap_flows); |
| INIT_LIST_HEAD(&r->encap_entries); |
| |
| spin_lock_bh(&encap->route_lock); |
| *route_tbl_change_time = encap->route_tbl_last_update; |
| hash_add(encap->route_tbl, &r->hlist, hash_key); |
| spin_unlock_bh(&encap->route_lock); |
| |
| return r; |
| } |
| |
| static struct mlx5e_route_entry * |
| mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) |
| { |
| u32 hash_key = hash_route_info(key); |
| struct mlx5e_route_entry *r; |
| |
| spin_lock_bh(&encap->route_lock); |
| encap->route_tbl_last_update = jiffies; |
| r = mlx5e_route_get(encap, key, hash_key); |
| spin_unlock_bh(&encap->route_lock); |
| |
| return r; |
| } |
| |
| struct mlx5e_tc_fib_event_data { |
| struct work_struct work; |
| unsigned long event; |
| struct mlx5e_route_entry *r; |
| struct net_device *ul_dev; |
| }; |
| |
| static void mlx5e_tc_fib_event_work(struct work_struct *work); |
| static struct mlx5e_tc_fib_event_data * |
| mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) |
| { |
| struct mlx5e_tc_fib_event_data *fib_work; |
| |
| fib_work = kzalloc(sizeof(*fib_work), flags); |
| if (WARN_ON(!fib_work)) |
| return NULL; |
| |
| INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); |
| fib_work->event = event; |
| fib_work->ul_dev = ul_dev; |
| |
| return fib_work; |
| } |
| |
| static int |
| mlx5e_route_enqueue_update(struct mlx5e_priv *priv, |
| struct mlx5e_route_entry *r, |
| unsigned long event) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_tc_fib_event_data *fib_work; |
| struct mlx5e_rep_priv *uplink_rpriv; |
| struct net_device *ul_dev; |
| |
| uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); |
| ul_dev = uplink_rpriv->netdev; |
| |
| fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); |
| if (!fib_work) |
| return -ENOMEM; |
| |
| dev_hold(ul_dev); |
| refcount_inc(&r->refcnt); |
| fib_work->r = r; |
| queue_work(priv->wq, &fib_work->work); |
| |
| return 0; |
| } |
| |
| int mlx5e_attach_decap_route(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| unsigned long tbl_time_before, tbl_time_after; |
| struct mlx5e_tc_flow_parse_attr *parse_attr; |
| struct mlx5_flow_attr *attr = flow->attr; |
| struct mlx5_esw_flow_attr *esw_attr; |
| struct mlx5e_route_entry *r; |
| struct mlx5e_route_key key; |
| int err = 0; |
| |
| esw_attr = attr->esw_attr; |
| parse_attr = attr->parse_attr; |
| mutex_lock(&esw->offloads.encap_tbl_lock); |
| if (!esw_attr->rx_tun_attr) |
| goto out; |
| |
| tbl_time_before = mlx5e_route_tbl_get_last_update(priv); |
| tbl_time_after = tbl_time_before; |
| err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr); |
| if (err || !esw_attr->rx_tun_attr->decap_vport) |
| goto out; |
| |
| key.ip_version = attr->tun_ip_version; |
| if (key.ip_version == 4) |
| key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; |
| else |
| key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; |
| |
| r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, |
| &tbl_time_after); |
| if (IS_ERR(r)) { |
| err = PTR_ERR(r); |
| goto out; |
| } |
| /* Routing changed concurrently. FIB event handler might have missed new |
| * entry, schedule update. |
| */ |
| if (tbl_time_before != tbl_time_after) { |
| err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); |
| if (err) { |
| mlx5e_route_put_locked(priv, r); |
| goto out; |
| } |
| } |
| |
| flow->decap_route = r; |
| list_add(&flow->decap_routes, &r->decap_flows); |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| return 0; |
| |
| out: |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| return err; |
| } |
| |
| static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow, |
| struct mlx5e_encap_entry *e, |
| bool new_encap_entry, |
| unsigned long tbl_time_before, |
| int out_index) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| unsigned long tbl_time_after = tbl_time_before; |
| struct mlx5e_tc_flow_parse_attr *parse_attr; |
| struct mlx5_flow_attr *attr = flow->attr; |
| const struct ip_tunnel_info *tun_info; |
| struct mlx5_esw_flow_attr *esw_attr; |
| struct mlx5e_route_entry *r; |
| struct mlx5e_route_key key; |
| unsigned short family; |
| int err = 0; |
| |
| esw_attr = attr->esw_attr; |
| parse_attr = attr->parse_attr; |
| tun_info = parse_attr->tun_info[out_index]; |
| family = ip_tunnel_info_af(tun_info); |
| |
| if (family == AF_INET) { |
| key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; |
| key.ip_version = 4; |
| } else if (family == AF_INET6) { |
| key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; |
| key.ip_version = 6; |
| } |
| |
| err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, |
| e->route_dev_ifindex, out_index); |
| if (err || !(esw_attr->dests[out_index].flags & |
| MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) |
| return err; |
| |
| r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], |
| &tbl_time_after); |
| if (IS_ERR(r)) |
| return PTR_ERR(r); |
| /* Routing changed concurrently. FIB event handler might have missed new |
| * entry, schedule update. |
| */ |
| if (tbl_time_before != tbl_time_after) { |
| err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); |
| if (err) { |
| mlx5e_route_put_locked(priv, r); |
| return err; |
| } |
| } |
| |
| flow->encap_routes[out_index].r = r; |
| if (new_encap_entry) |
| list_add(&e->route_list, &r->encap_entries); |
| flow->encap_routes[out_index].index = out_index; |
| return 0; |
| } |
| |
| void mlx5e_detach_decap_route(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_route_entry *r = flow->decap_route; |
| |
| if (!r) |
| return; |
| |
| mutex_lock(&esw->offloads.encap_tbl_lock); |
| list_del(&flow->decap_routes); |
| flow->decap_route = NULL; |
| |
| if (!refcount_dec_and_test(&r->refcnt)) { |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| return; |
| } |
| hash_del_rcu(&r->hlist); |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| |
| mlx5e_route_dealloc(priv, r); |
| } |
| |
| static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, |
| struct mlx5e_tc_flow *flow, |
| int out_index) |
| { |
| struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_encap_entry *e, *tmp; |
| |
| if (!r) |
| return; |
| |
| mutex_lock(&esw->offloads.encap_tbl_lock); |
| flow->encap_routes[out_index].r = NULL; |
| |
| if (!refcount_dec_and_test(&r->refcnt)) { |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| return; |
| } |
| list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) |
| list_del_init(&e->route_list); |
| hash_del_rcu(&r->hlist); |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| |
| mlx5e_route_dealloc(priv, r); |
| } |
| |
| static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, |
| struct mlx5e_encap_entry *e, |
| struct list_head *encap_flows) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_tc_flow *flow; |
| |
| list_for_each_entry(flow, encap_flows, tmp_list) { |
| struct mlx5_flow_attr *attr = flow->attr; |
| struct mlx5_esw_flow_attr *esw_attr; |
| |
| if (!mlx5e_is_offloaded_flow(flow)) |
| continue; |
| esw_attr = attr->esw_attr; |
| |
| if (flow_flag_test(flow, SLOW)) |
| mlx5e_tc_unoffload_from_slow_path(esw, flow); |
| else |
| mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); |
| mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); |
| attr->modify_hdr = NULL; |
| |
| esw_attr->dests[flow->tmp_entry_index].flags &= |
| ~MLX5_ESW_DEST_ENCAP_VALID; |
| esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; |
| } |
| |
| e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; |
| if (e->flags & MLX5_ENCAP_ENTRY_VALID) { |
| e->flags &= ~MLX5_ENCAP_ENTRY_VALID; |
| mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); |
| e->pkt_reformat = NULL; |
| } |
| } |
| |
| static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, |
| struct net_device *tunnel_dev, |
| struct mlx5e_encap_entry *e, |
| struct list_head *encap_flows) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_tc_flow *flow; |
| int err; |
| |
| err = ip_tunnel_info_af(e->tun_info) == AF_INET ? |
| mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : |
| mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); |
| if (err) |
| mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); |
| e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; |
| |
| list_for_each_entry(flow, encap_flows, tmp_list) { |
| struct mlx5e_tc_flow_parse_attr *parse_attr; |
| struct mlx5_flow_attr *attr = flow->attr; |
| struct mlx5_esw_flow_attr *esw_attr; |
| struct mlx5_flow_handle *rule; |
| struct mlx5_flow_spec *spec; |
| |
| if (flow_flag_test(flow, FAILED)) |
| continue; |
| |
| esw_attr = attr->esw_attr; |
| parse_attr = attr->parse_attr; |
| spec = &parse_attr->spec; |
| |
| err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, |
| e->out_dev, e->route_dev_ifindex, |
| flow->tmp_entry_index); |
| if (err) { |
| mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); |
| continue; |
| } |
| |
| err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); |
| if (err) { |
| mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", |
| err); |
| continue; |
| } |
| |
| if (e->flags & MLX5_ENCAP_ENTRY_VALID) { |
| esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; |
| esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; |
| if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) |
| goto offload_to_slow_path; |
| /* update from slow path rule to encap rule */ |
| rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); |
| if (IS_ERR(rule)) { |
| err = PTR_ERR(rule); |
| mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", |
| err); |
| } else { |
| flow->rule[0] = rule; |
| } |
| } else { |
| offload_to_slow_path: |
| rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); |
| /* mark the flow's encap dest as non-valid */ |
| esw_attr->dests[flow->tmp_entry_index].flags &= |
| ~MLX5_ESW_DEST_ENCAP_VALID; |
| |
| if (IS_ERR(rule)) { |
| err = PTR_ERR(rule); |
| mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", |
| err); |
| } else { |
| flow->rule[0] = rule; |
| } |
| } |
| flow_flag_set(flow, OFFLOADED); |
| } |
| } |
| |
| static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, |
| struct mlx5e_route_entry *r, |
| struct list_head *flow_list, |
| bool replace) |
| { |
| struct net_device *tunnel_dev; |
| struct mlx5e_encap_entry *e; |
| |
| tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); |
| if (!tunnel_dev) |
| return -ENODEV; |
| |
| list_for_each_entry(e, &r->encap_entries, route_list) { |
| LIST_HEAD(encap_flows); |
| |
| mlx5e_take_all_encap_flows(e, &encap_flows); |
| if (list_empty(&encap_flows)) |
| continue; |
| |
| if (mlx5e_route_entry_valid(r)) |
| mlx5e_invalidate_encap(priv, e, &encap_flows); |
| |
| if (!replace) { |
| list_splice(&encap_flows, flow_list); |
| continue; |
| } |
| |
| mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); |
| list_splice(&encap_flows, flow_list); |
| } |
| |
| return 0; |
| } |
| |
| static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, |
| struct list_head *flow_list) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_tc_flow *flow; |
| |
| list_for_each_entry(flow, flow_list, tmp_list) |
| if (mlx5e_is_offloaded_flow(flow)) |
| mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); |
| } |
| |
| static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, |
| struct list_head *decap_flows) |
| { |
| struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; |
| struct mlx5e_tc_flow *flow; |
| |
| list_for_each_entry(flow, decap_flows, tmp_list) { |
| struct mlx5e_tc_flow_parse_attr *parse_attr; |
| struct mlx5_flow_attr *attr = flow->attr; |
| struct mlx5_flow_handle *rule; |
| struct mlx5_flow_spec *spec; |
| int err; |
| |
| if (flow_flag_test(flow, FAILED)) |
| continue; |
| |
| parse_attr = attr->parse_attr; |
| spec = &parse_attr->spec; |
| err = mlx5e_tc_tun_route_lookup(priv, spec, attr); |
| if (err) { |
| mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", |
| err); |
| continue; |
| } |
| |
| rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); |
| if (IS_ERR(rule)) { |
| err = PTR_ERR(rule); |
| mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", |
| err); |
| } else { |
| flow->rule[0] = rule; |
| flow_flag_set(flow, OFFLOADED); |
| } |
| } |
| } |
| |
| static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, |
| struct mlx5e_route_entry *r, |
| struct list_head *flow_list, |
| bool replace) |
| { |
| struct net_device *tunnel_dev; |
| LIST_HEAD(decap_flows); |
| |
| tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); |
| if (!tunnel_dev) |
| return -ENODEV; |
| |
| mlx5e_take_all_route_decap_flows(r, &decap_flows); |
| if (mlx5e_route_entry_valid(r)) |
| mlx5e_unoffload_flow_list(priv, &decap_flows); |
| if (replace) |
| mlx5e_reoffload_decap(priv, &decap_flows); |
| |
| list_splice(&decap_flows, flow_list); |
| |
| return 0; |
| } |
| |
| static void mlx5e_tc_fib_event_work(struct work_struct *work) |
| { |
| struct mlx5e_tc_fib_event_data *event_data = |
| container_of(work, struct mlx5e_tc_fib_event_data, work); |
| struct net_device *ul_dev = event_data->ul_dev; |
| struct mlx5e_priv *priv = netdev_priv(ul_dev); |
| struct mlx5e_route_entry *r = event_data->r; |
| struct mlx5_eswitch *esw; |
| LIST_HEAD(flow_list); |
| bool replace; |
| int err; |
| |
| /* sync with concurrent neigh updates */ |
| rtnl_lock(); |
| esw = priv->mdev->priv.eswitch; |
| mutex_lock(&esw->offloads.encap_tbl_lock); |
| replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; |
| |
| if (!mlx5e_route_entry_valid(r) && !replace) |
| goto out; |
| |
| err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); |
| if (err) |
| mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", |
| err); |
| |
| err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); |
| if (err) |
| mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", |
| err); |
| |
| if (replace) |
| r->flags |= MLX5E_ROUTE_ENTRY_VALID; |
| out: |
| mutex_unlock(&esw->offloads.encap_tbl_lock); |
| rtnl_unlock(); |
| |
| mlx5e_put_flow_list(priv, &flow_list); |
| mlx5e_route_put(priv, event_data->r); |
| dev_put(event_data->ul_dev); |
| kfree(event_data); |
| } |
| |
| static struct mlx5e_tc_fib_event_data * |
| mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, |
| struct net_device *ul_dev, |
| struct mlx5e_tc_tun_encap *encap, |
| unsigned long event, |
| struct fib_notifier_info *info) |
| { |
| struct fib_entry_notifier_info *fen_info; |
| struct mlx5e_tc_fib_event_data *fib_work; |
| struct mlx5e_route_entry *r; |
| struct mlx5e_route_key key; |
| struct net_device *fib_dev; |
| |
| fen_info = container_of(info, struct fib_entry_notifier_info, info); |
| fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; |
| if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || |
| fen_info->dst_len != 32) |
| return NULL; |
| |
| fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); |
| if (!fib_work) |
| return ERR_PTR(-ENOMEM); |
| |
| key.endpoint_ip.v4 = htonl(fen_info->dst); |
| key.ip_version = 4; |
| |
| /* Can't fail after this point because releasing reference to r |
| * requires obtaining sleeping mutex which we can't do in atomic |
| * context. |
| */ |
| r = mlx5e_route_lookup_for_update(encap, &key); |
| if (!r) |
| goto out; |
| fib_work->r = r; |
| dev_hold(ul_dev); |
| |
| return fib_work; |
| |
| out: |
| kfree(fib_work); |
| return NULL; |
| } |
| |
| static struct mlx5e_tc_fib_event_data * |
| mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, |
| struct net_device *ul_dev, |
| struct mlx5e_tc_tun_encap *encap, |
| unsigned long event, |
| struct fib_notifier_info *info) |
| { |
| struct fib6_entry_notifier_info *fen_info; |
| struct mlx5e_tc_fib_event_data *fib_work; |
| struct mlx5e_route_entry *r; |
| struct mlx5e_route_key key; |
| struct net_device *fib_dev; |
| |
| fen_info = container_of(info, struct fib6_entry_notifier_info, info); |
| fib_dev = fib6_info_nh_dev(fen_info->rt); |
| if (fib_dev->netdev_ops != &mlx5e_netdev_ops || |
| fen_info->rt->fib6_dst.plen != 128) |
| return NULL; |
| |
| fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); |
| if (!fib_work) |
| return ERR_PTR(-ENOMEM); |
| |
| memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, |
| sizeof(fen_info->rt->fib6_dst.addr)); |
| key.ip_version = 6; |
| |
| /* Can't fail after this point because releasing reference to r |
| * requires obtaining sleeping mutex which we can't do in atomic |
| * context. |
| */ |
| r = mlx5e_route_lookup_for_update(encap, &key); |
| if (!r) |
| goto out; |
| fib_work->r = r; |
| dev_hold(ul_dev); |
| |
| return fib_work; |
| |
| out: |
| kfree(fib_work); |
| return NULL; |
| } |
| |
| static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) |
| { |
| struct mlx5e_tc_fib_event_data *fib_work; |
| struct fib_notifier_info *info = ptr; |
| struct mlx5e_tc_tun_encap *encap; |
| struct net_device *ul_dev; |
| struct mlx5e_priv *priv; |
| |
| encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); |
| priv = encap->priv; |
| ul_dev = priv->netdev; |
| priv = netdev_priv(ul_dev); |
| |
| switch (event) { |
| case FIB_EVENT_ENTRY_REPLACE: |
| case FIB_EVENT_ENTRY_DEL: |
| if (info->family == AF_INET) |
| fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); |
| else if (info->family == AF_INET6) |
| fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); |
| else |
| return NOTIFY_DONE; |
| |
| if (!IS_ERR_OR_NULL(fib_work)) { |
| queue_work(priv->wq, &fib_work->work); |
| } else if (IS_ERR(fib_work)) { |
| NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); |
| mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", |
| PTR_ERR(fib_work)); |
| } |
| |
| break; |
| default: |
| return NOTIFY_DONE; |
| } |
| |
| return NOTIFY_DONE; |
| } |
| |
| struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) |
| { |
| struct mlx5e_tc_tun_encap *encap; |
| int err; |
| |
| encap = kvzalloc(sizeof(*encap), GFP_KERNEL); |
| if (!encap) |
| return ERR_PTR(-ENOMEM); |
| |
| encap->priv = priv; |
| encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; |
| spin_lock_init(&encap->route_lock); |
| hash_init(encap->route_tbl); |
| err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, |
| NULL, NULL); |
| if (err) { |
| kvfree(encap); |
| return ERR_PTR(err); |
| } |
| |
| return encap; |
| } |
| |
| void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) |
| { |
| if (!encap) |
| return; |
| |
| unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); |
| flush_workqueue(encap->priv->wq); /* flush fib event works */ |
| kvfree(encap); |
| } |