net/mlx5: Enable host PF HCA after eswitch is initialized

Currently ECPF enables external host PF too early in the initialization
sequence for Ethernet links when ECPF is eswitch manager.

Due to this, when external host PF driver is loaded, host PF's HCA CAP has
inner_ip_version supported by NIC RX flow table.
This capability is later updated by firmware after ECPF driver enables
ENCAP/DECAP as eswitch manager.

This results into a timing race condition, where CREATE_TIR command
fails with a below syndrome on host PF.

mlx5_cmd_check:775:(pid 510): CREATE_TIR(0x900) op_mod(0x0) failed,
status bad parameter(0x3), syndrome (0x562b00)

Hence, enable the external host PF after necessary eswitch and per vport
initialization is completed.
Continue to enable host PF when eswitch manager capability is off for a
ECPF.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Bodong Wang <bodong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index 68ca0e2..464eb3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -8,7 +8,16 @@
 	return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
 }
 
-static int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev)
+static bool mlx5_ecpf_esw_admins_host_pf(const struct mlx5_core_dev *dev)
+{
+	/* In separate host mode, PF enables itself.
+	 * When ECPF is eswitch manager, eswitch enables host PF after
+	 * eswitch is setup.
+	 */
+	return mlx5_core_is_ecpf_esw_manager(dev);
+}
+
+int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev)
 {
 	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {};
@@ -19,7 +28,7 @@
 	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 }
 
-static int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev)
+int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev)
 {
 	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {};
@@ -34,6 +43,12 @@
 {
 	int err;
 
+	if (mlx5_ecpf_esw_admins_host_pf(dev))
+		return 0;
+
+	/* ECPF shall enable HCA for host PF in the same way a PF
+	 * does this for its VFs when ECPF is not a eswitch manager.
+	 */
 	err = mlx5_cmd_host_pf_enable_hca(dev);
 	if (err)
 		mlx5_core_err(dev, "Failed to enable external host PF HCA err(%d)\n", err);
@@ -45,15 +60,14 @@
 {
 	int err;
 
+	if (mlx5_ecpf_esw_admins_host_pf(dev))
+		return;
+
 	err = mlx5_cmd_host_pf_disable_hca(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to disable external host PF HCA err(%d)\n", err);
 		return;
 	}
-
-	err = mlx5_wait_for_pages(dev, &dev->priv.host_pf_pages);
-	if (err)
-		mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err);
 }
 
 int mlx5_ec_init(struct mlx5_core_dev *dev)
@@ -61,16 +75,19 @@
 	if (!mlx5_core_is_ecpf(dev))
 		return 0;
 
-	/* ECPF shall enable HCA for host PF in the same way a PF
-	 * does this for its VFs.
-	 */
 	return mlx5_host_pf_init(dev);
 }
 
 void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
 {
+	int err;
+
 	if (!mlx5_core_is_ecpf(dev))
 		return;
 
 	mlx5_host_pf_cleanup(dev);
+
+	err = mlx5_wait_for_pages(dev, &dev->priv.host_pf_pages);
+	if (err)
+		mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
index d3d7a00..40b6ad7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -17,6 +17,9 @@
 int mlx5_ec_init(struct mlx5_core_dev *dev);
 void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
 
+int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 
 static inline bool
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6e6a9a56..dcd8946a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1469,6 +1469,26 @@
 	return err;
 }
 
+static int host_pf_enable_hca(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_ecpf(dev))
+		return 0;
+
+	/* Once vport and representor are ready, take out the external host PF
+	 * out of initializing state. Enabling HCA clears the iser->initializing
+	 * bit and host PF driver loading can progress.
+	 */
+	return mlx5_cmd_host_pf_enable_hca(dev);
+}
+
+static void host_pf_disable_hca(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_ecpf(dev))
+		return;
+
+	mlx5_cmd_host_pf_disable_hca(dev);
+}
+
 /* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs
  * whichever are present on the eswitch.
  */
@@ -1483,6 +1503,11 @@
 	if (ret)
 		return ret;
 
+	/* Enable external host PF HCA */
+	ret = host_pf_enable_hca(esw->dev);
+	if (ret)
+		goto pf_hca_err;
+
 	/* Enable ECPF vport */
 	if (mlx5_ecpf_vport_exists(esw->dev)) {
 		ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events);
@@ -1500,8 +1525,9 @@
 vf_err:
 	if (mlx5_ecpf_vport_exists(esw->dev))
 		mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
-
 ecpf_err:
+	host_pf_disable_hca(esw->dev);
+pf_hca_err:
 	mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
 	return ret;
 }
@@ -1516,6 +1542,7 @@
 	if (mlx5_ecpf_vport_exists(esw->dev))
 		mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
 
+	host_pf_disable_hca(esw->dev);
 	mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a9757cc..d86f06f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1126,23 +1126,23 @@
 		goto err_sriov;
 	}
 
-	err = mlx5_sriov_attach(dev);
-	if (err) {
-		mlx5_core_err(dev, "sriov init failed %d\n", err);
-		goto err_sriov;
-	}
-
 	err = mlx5_ec_init(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to init embedded CPU\n");
 		goto err_ec;
 	}
 
+	err = mlx5_sriov_attach(dev);
+	if (err) {
+		mlx5_core_err(dev, "sriov init failed %d\n", err);
+		goto err_sriov;
+	}
+
 	return 0;
 
-err_ec:
-	mlx5_sriov_detach(dev);
 err_sriov:
+	mlx5_ec_cleanup(dev);
+err_ec:
 	mlx5_cleanup_fs(dev);
 err_fs:
 	mlx5_accel_tls_cleanup(dev);
@@ -1168,8 +1168,8 @@
 
 static void mlx5_unload(struct mlx5_core_dev *dev)
 {
-	mlx5_ec_cleanup(dev);
 	mlx5_sriov_detach(dev);
+	mlx5_ec_cleanup(dev);
 	mlx5_cleanup_fs(dev);
 	mlx5_accel_ipsec_cleanup(dev);
 	mlx5_accel_tls_cleanup(dev);