Merge tag 'spi-v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi

Pull spi updates from Mark Brown:
 "There's quite a lot of changes for SPI in this release but none in the
  core, they're all mostly small driver updates and additions. Some of
  the more notable changes include:

   - A huge set of cleanups, optimizations and improvements for the
     DesignWare driver from Serge Semin finishing up the work started
     last release.

   - Conversion of the Zynq gqspi driver to spi-mem.

   - Support for Baikal T1, Broadcom BCMSTB 7445, and Renesas R8A7742"

* tag 'spi-v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi: (137 commits)
  spi: cadence: Add SPI transfer delays
  spi: dw: Add Baikal-T1 SPI Controller bindings
  spi: dw: Add Baikal-T1 SPI Controller glue driver
  spi: dw: Add poll-based SPI transfers support
  spi: dw: Introduce max mem-ops SPI bus frequency setting
  spi: dw: Add memory operations support
  spi: dw: Add generic DW SSI status-check method
  spi: dw: Move num-of retries parameter to the header file
  spi: dw: Explicitly de-assert CS on SPI transfer completion
  spi: dw: De-assert chip-select on reset
  spi: dw: Discard chip enabling on DMA setup error
  spi: dw: Unmask IRQs after enabling the chip
  spi: dw: Perform IRQ setup in a dedicated function
  spi: dw: Refactor IRQ-based SPI transfer procedure
  spi: dw: Refactor data IO procedure
  spi: dw: Add DW SPI controller config structure
  spi: dw: Update Rx sample delay in the config function
  spi: dw: Simplify the SPI bus speed config procedure
  spi: dw: Update SPI bus speed in a config function
  spi: dw: Detach SPI device specific CR0 config method
  ...
diff --git a/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml b/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml
index b26d4b4..fe39ea4 100644
--- a/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml
+++ b/Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml
@@ -19,6 +19,7 @@
   compatible:
     enum:
       - ibm,fsi2spi
+      - ibm,fsi2spi-restricted
 
   reg:
     items:
diff --git a/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt b/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt
index 62d4ed2..d99a9cf 100644
--- a/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt
+++ b/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt
@@ -32,6 +32,8 @@
     			     			  			    BRCMSTB  SoCs
     "brcm,spi-bcm7435-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
     			     			  			    BRCMSTB  SoCs
+    "brcm,spi-bcm7445-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
+                                                                            BRCMSTB  SoCs
     "brcm,spi-bcm7216-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
     			     			  			    BRCMSTB  SoCs
     "brcm,spi-bcm7278-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml
new file mode 100644
index 0000000..55c2394
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/mediatek,spi-mtk-nor.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Serial NOR flash controller for MediaTek ARM SoCs
+
+maintainers:
+  - Bayi Cheng <bayi.cheng@mediatek.com>
+  - Chuanhong Guo <gch981213@gmail.com>
+
+description: |
+  This spi controller support single, dual, or quad mode transfer for
+  SPI NOR flash. There should be only one spi slave device following
+  generic spi bindings. It's not recommended to use this controller
+  for devices other than SPI NOR flash due to limited transfer
+  capability of this controller.
+
+allOf:
+  - $ref: /spi/spi-controller.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - mediatek,mt2701-nor
+              - mediatek,mt2712-nor
+              - mediatek,mt7622-nor
+              - mediatek,mt7623-nor
+              - mediatek,mt7629-nor
+              - mediatek,mt8192-nor
+          - enum:
+              - mediatek,mt8173-nor
+      - items:
+          - const: mediatek,mt8173-nor
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: clock used for spi bus
+      - description: clock used for controller
+
+  clock-names:
+    items:
+      - const: spi
+      - const: sf
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/mt8173-clk.h>
+
+    soc {
+      #address-cells = <2>;
+      #size-cells = <2>;
+
+      nor_flash: spi@1100d000 {
+        compatible = "mediatek,mt8173-nor";
+        reg = <0 0x1100d000 0 0xe0>;
+        interrupts = <&spi_flash_irq>;
+        clocks = <&pericfg CLK_PERI_SPI>, <&topckgen CLK_TOP_SPINFI_IFR_SEL>;
+        clock-names = "spi", "sf";
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        flash@0 {
+          compatible = "jedec,spi-nor";
+          reg = <0>;
+        };
+      };
+    };
+
diff --git a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
index c54ac05..0d201ce 100644
--- a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
+++ b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
@@ -25,6 +25,7 @@
 
       - items:
           - enum:
+              - renesas,qspi-r8a7742   # RZ/G1H
               - renesas,qspi-r8a7743   # RZ/G1M
               - renesas,qspi-r8a7744   # RZ/G1N
               - renesas,qspi-r8a7745   # RZ/G1E
diff --git a/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml b/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml
index 9f7b118..3d3b60e 100644
--- a/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml
+++ b/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml
@@ -41,6 +41,7 @@
               - renesas,msiof-r8a774e1      # RZ/G2H
               - renesas,msiof-r8a7795       # R-Car H3
               - renesas,msiof-r8a7796       # R-Car M3-W
+              - renesas,msiof-r8a77961      # R-Car M3-W+
               - renesas,msiof-r8a77965      # R-Car M3-N
               - renesas,msiof-r8a77970      # R-Car V3M
               - renesas,msiof-r8a77980      # R-Car V3H
diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
index c62cbe7..99ed9b4 100644
--- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
+++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
@@ -22,6 +22,21 @@
       properties:
         reg:
           minItems: 2
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - baikal,bt1-sys-ssi
+    then:
+      properties:
+        mux-controls:
+          maxItems: 1
+      required:
+        - mux-controls
+    else:
+      required:
+        - interrupts
 
 properties:
   compatible:
@@ -36,6 +51,8 @@
               - mscc,ocelot-spi
               - mscc,jaguar2-spi
           - const: snps,dw-apb-ssi
+      - description: Microchip Sparx5 SoC SPI Controller
+        const: microchip,sparx5-spi
       - description: Amazon Alpine SPI Controller
         const: amazon,alpine-dw-apb-ssi
       - description: Renesas RZ/N1 SPI Controller
@@ -44,12 +61,16 @@
           - const: snps,dw-apb-ssi
       - description: Intel Keem Bay SPI Controller
         const: intel,keembay-ssi
+      - description: Baikal-T1 SPI Controller
+        const: baikal,bt1-ssi
+      - description: Baikal-T1 System Boot SPI Controller
+        const: baikal,bt1-sys-ssi
 
   reg:
     minItems: 1
     items:
       - description: DW APB SSI controller memory mapped registers
-      - description: SPI MST region map
+      - description: SPI MST region map or directly mapped SPI ROM
 
   interrupts:
     maxItems: 1
@@ -93,6 +114,12 @@
       - const: tx
       - const: rx
 
+  rx-sample-delay-ns:
+    default: 0
+    description: Default value of the rx-sample-delay-ns property.
+      This value will be used if the property is not explicitly defined
+      for a SPI slave device. See below.
+
 patternProperties:
   "^.*@[0-9a-f]+$":
     type: object
@@ -107,6 +134,13 @@
       spi-tx-bus-width:
         const: 1
 
+      rx-sample-delay-ns:
+        description: SPI Rx sample delay offset, unit is nanoseconds.
+          The delay from the default sample time before the actual
+          sample of the rxd input signal occurs. The "rx_sample_delay"
+          is an optional feature of the designware controller, and the
+          upper limit is also subject to controller configuration.
+
 unevaluatedProperties: false
 
 required:
@@ -114,7 +148,6 @@
   - reg
   - "#address-cells"
   - "#size-cells"
-  - interrupts
   - clocks
 
 examples:
@@ -129,5 +162,22 @@
       num-cs = <2>;
       cs-gpios = <&gpio0 13 0>,
                  <&gpio0 14 0>;
+      rx-sample-delay-ns = <3>;
+      spi-flash@1 {
+        compatible = "spi-nand";
+        reg = <1>;
+        rx-sample-delay-ns = <7>;
+      };
+    };
+  - |
+    spi@1f040100 {
+      compatible = "baikal,bt1-sys-ssi";
+      reg = <0x1f040100 0x900>,
+            <0x1c000000 0x1000000>;
+      #address-cells = <1>;
+      #size-cells = <0>;
+      mux-controls = <&boot_mux>;
+      clocks = <&ccu_sys>;
+      clock-names = "ssi_clk";
     };
 ...
diff --git a/Documentation/devicetree/bindings/spi/spi-mtk-nor.txt b/Documentation/devicetree/bindings/spi/spi-mtk-nor.txt
deleted file mode 100644
index 984ae7f..0000000
--- a/Documentation/devicetree/bindings/spi/spi-mtk-nor.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-* Serial NOR flash controller for MediaTek ARM SoCs
-
-Required properties:
-- compatible: 	  For mt8173, compatible should be "mediatek,mt8173-nor",
-		  and it's the fallback compatible for other Soc.
-		  For every other SoC, should contain both the SoC-specific compatible
-		  string and "mediatek,mt8173-nor".
-		  The possible values are:
-		  "mediatek,mt2701-nor", "mediatek,mt8173-nor"
-		  "mediatek,mt2712-nor", "mediatek,mt8173-nor"
-		  "mediatek,mt7622-nor", "mediatek,mt8173-nor"
-		  "mediatek,mt7623-nor", "mediatek,mt8173-nor"
-		  "mediatek,mt7629-nor", "mediatek,mt8173-nor"
-		  "mediatek,mt8173-nor"
-- reg: 		  physical base address and length of the controller's register
-- interrupts:	  Interrupt number used by the controller.
-- clocks: 	  the phandle of the clocks needed by the nor controller
-- clock-names: 	  the names of the clocks
-		  the clocks should be named "spi" and "sf". "spi" is used for spi bus,
-		  and "sf" is used for controller, these are the clocks witch
-		  hardware needs to enabling nor flash and nor flash controller.
-		  See Documentation/devicetree/bindings/clock/clock-bindings.txt for details.
-- #address-cells: should be <1>
-- #size-cells:	  should be <0>
-
-There should be only one spi slave device following generic spi bindings.
-It's not recommended to use this controller for devices other than SPI NOR
-flash due to limited transfer capability of this controller.
-
-Example:
-
-nor_flash: spi@1100d000 {
-	compatible = "mediatek,mt8173-nor";
-	reg = <0 0x1100d000 0 0xe0>;
-	interrupts = <&spi_flash_irq>;
-	clocks = <&pericfg CLK_PERI_SPI>,
-		 <&topckgen CLK_TOP_SPINFI_IFR_SEL>;
-	clock-names = "spi", "sf";
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	flash@0 {
-		compatible = "jedec,spi-nor";
-		reg = <0>;
-	};
-};
-
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index c6ea760..d2c976e 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -235,6 +235,7 @@
 
 config SPI_DESIGNWARE
 	tristate "DesignWare SPI controller core support"
+	imply SPI_MEM
 	help
 	  general driver for SPI controller core from DesignWare
 
@@ -251,6 +252,34 @@
 	tristate "Memory-mapped io interface driver for DW SPI core"
 	depends on HAS_IOMEM
 
+config SPI_DW_BT1
+	tristate "Baikal-T1 SPI driver for DW SPI core"
+	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+	help
+	  Baikal-T1 SoC is equipped with three DW APB SSI-based MMIO SPI
+	  controllers. Two of them are pretty much normal: with IRQ, DMA,
+	  FIFOs of 64 words depth, 4x CSs, but the third one as being a
+	  part of the Baikal-T1 System Boot Controller has got a very
+	  limited resources: no IRQ, no DMA, only a single native
+	  chip-select and Tx/Rx FIFO with just 8 words depth available.
+	  The later one is normally connected to an external SPI-nor flash
+	  of 128Mb (in general can be of bigger size).
+
+config SPI_DW_BT1_DIRMAP
+	bool "Directly mapped Baikal-T1 Boot SPI flash support"
+	depends on SPI_DW_BT1
+	select MULTIPLEXER
+	select MUX_MMIO
+	help
+	  Directly mapped SPI flash memory is an interface specific to the
+	  Baikal-T1 System Boot Controller. It is a 16MB MMIO region, which
+	  can be used to access a peripheral memory device just by
+	  reading/writing data from/to it. Note that the system APB bus
+	  will stall during each IO from/to the dirmap region until the
+	  operation is finished. So try not to use it concurrently with
+	  time-critical tasks (like the SPI memory operations implemented
+	  in this driver).
+
 endif
 
 config SPI_DLN2
@@ -637,7 +666,7 @@
 
 config SPI_QUP
 	tristate "Qualcomm SPI controller with QUP interface"
-	depends on ARCH_QCOM || (ARM && COMPILE_TEST)
+	depends on ARCH_QCOM || COMPILE_TEST
 	help
 	  Qualcomm Universal Peripheral (QUP) core is an AHB slave that
 	  provides a common data path (an output FIFO and an input FIFO)
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index cf955ea..21dc758 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -39,6 +39,7 @@
 obj-$(CONFIG_SPI_DESIGNWARE)		+= spi-dw.o
 spi-dw-y				:= spi-dw-core.o
 spi-dw-$(CONFIG_SPI_DW_DMA)		+= spi-dw-dma.o
+obj-$(CONFIG_SPI_DW_BT1)		+= spi-dw-bt1.o
 obj-$(CONFIG_SPI_DW_MMIO)		+= spi-dw-mmio.o
 obj-$(CONFIG_SPI_DW_PCI)		+= spi-dw-pci.o
 obj-$(CONFIG_SPI_EFM32)			+= spi-efm32.o
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index fcde419..46feafe 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -848,7 +848,6 @@
 	platform_set_drvdata(pdev, master);
 
 	spi = spi_master_get_devdata(master);
-	memset(spi, 0, sizeof(struct a3700_spi));
 
 	spi->master = master;
 
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 2cfe625..ce5bd06 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -513,9 +513,8 @@
 
 	master->dma_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR(master->dma_tx)) {
-		err = PTR_ERR(master->dma_tx);
-		if (err != -EPROBE_DEFER)
-			dev_err(dev, "No TX DMA channel, DMA is disabled\n");
+		err = dev_err_probe(dev, PTR_ERR(master->dma_tx),
+				    "No TX DMA channel, DMA is disabled\n");
 		goto error_clear;
 	}
 
@@ -859,6 +858,7 @@
 	csr = spi_readl(as, CSR0 + 4 * chip_select);
 	csr = SPI_BFINS(SCBR, scbr, csr);
 	spi_writel(as, CSR0 + 4 * chip_select, csr);
+	xfer->effective_speed_hz = bus_hz / scbr;
 
 	return 0;
 }
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 9cfa15e..14c9d01 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -1282,16 +1282,9 @@
 
 static const struct of_device_id bcm_qspi_of_match[] = {
 	{
-		.compatible = "brcm,spi-bcm7425-qspi",
-		.data = &bcm_qspi_no_rev_data,
-	},
-	{
-		.compatible = "brcm,spi-bcm7429-qspi",
-		.data = &bcm_qspi_no_rev_data,
-	},
-	{
-		.compatible = "brcm,spi-bcm7435-qspi",
-		.data = &bcm_qspi_no_rev_data,
+		.compatible = "brcm,spi-bcm7445-qspi",
+		.data = &bcm_qspi_rev_data,
+
 	},
 	{
 		.compatible = "brcm,spi-bcm-qspi",
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 41986ac..b87116e 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -1319,11 +1319,8 @@
 
 	bs->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(bs->clk)) {
-		err = PTR_ERR(bs->clk);
-		if (err == -EPROBE_DEFER)
-			dev_dbg(&pdev->dev, "could not get clk: %d\n", err);
-		else
-			dev_err(&pdev->dev, "could not get clk: %d\n", err);
+		err = dev_err_probe(&pdev->dev, PTR_ERR(bs->clk),
+				    "could not get clk\n");
 		goto out_controller_put;
 	}
 
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index c6795c6..40938cf 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -1119,11 +1119,8 @@
 	cqspi->rx_chan = dma_request_chan_by_mask(&mask);
 	if (IS_ERR(cqspi->rx_chan)) {
 		int ret = PTR_ERR(cqspi->rx_chan);
-
-		if (ret != -EPROBE_DEFER)
-			dev_err(&cqspi->pdev->dev, "No Rx DMA available\n");
 		cqspi->rx_chan = NULL;
-		return ret;
+		return dev_err_probe(&cqspi->pdev->dev, ret, "No Rx DMA available\n");
 	}
 	init_completion(&cqspi->rx_dma_complete);
 
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 2b6b9c1..70467b9 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -418,8 +418,8 @@
 	xspi->rx_bytes = transfer->len;
 
 	cdns_spi_setup_transfer(spi, transfer);
-
 	cdns_spi_fill_tx_fifo(xspi);
+	spi_transfer_delay_exec(transfer);
 
 	cdns_spi_write(xspi, CDNS_SPI_IER, CDNS_SPI_IXR_DEFAULT);
 	return transfer->len;
diff --git a/drivers/spi/spi-dw-bt1.c b/drivers/spi/spi-dw-bt1.c
new file mode 100644
index 0000000..f382dfad
--- /dev/null
+++ b/drivers/spi/spi-dw-bt1.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+//
+// Authors:
+//   Ramil Zaripov <Ramil.Zaripov@baikalelectronics.ru>
+//   Serge Semin <Sergey.Semin@baikalelectronics.ru>
+//
+// Baikal-T1 DW APB SPI and System Boot SPI driver
+//
+
+#include <linux/clk.h>
+#include <linux/cpumask.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mux/consumer.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <linux/spi/spi-mem.h>
+#include <linux/spi/spi.h>
+
+#include "spi-dw.h"
+
+#define BT1_BOOT_DIRMAP		0
+#define BT1_BOOT_REGS		1
+
+struct dw_spi_bt1 {
+	struct dw_spi		dws;
+	struct clk		*clk;
+	struct mux_control	*mux;
+
+#ifdef CONFIG_SPI_DW_BT1_DIRMAP
+	void __iomem		*map;
+	resource_size_t		map_len;
+#endif
+};
+#define to_dw_spi_bt1(_ctlr) \
+	container_of(spi_controller_get_devdata(_ctlr), struct dw_spi_bt1, dws)
+
+typedef int (*dw_spi_bt1_init_cb)(struct platform_device *pdev,
+				    struct dw_spi_bt1 *dwsbt1);
+
+#ifdef CONFIG_SPI_DW_BT1_DIRMAP
+
+static int dw_spi_bt1_dirmap_create(struct spi_mem_dirmap_desc *desc)
+{
+	struct dw_spi_bt1 *dwsbt1 = to_dw_spi_bt1(desc->mem->spi->controller);
+
+	if (!dwsbt1->map ||
+	    !dwsbt1->dws.mem_ops.supports_op(desc->mem, &desc->info.op_tmpl))
+		return -EOPNOTSUPP;
+
+	/*
+	 * Make sure the requested region doesn't go out of the physically
+	 * mapped flash memory bounds and the operation is read-only.
+	 */
+	if (desc->info.offset + desc->info.length > dwsbt1->map_len ||
+	    desc->info.op_tmpl.data.dir != SPI_MEM_DATA_IN)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/*
+ * Directly mapped SPI memory region is only accessible in the dword chunks.
+ * That's why we have to create a dedicated read-method to copy data from there
+ * to the passed buffer.
+ */
+static void dw_spi_bt1_dirmap_copy_from_map(void *to, void __iomem *from, size_t len)
+{
+	size_t shift, chunk;
+	u32 data;
+
+	/*
+	 * We split the copying up into the next three stages: unaligned head,
+	 * aligned body, unaligned tail.
+	 */
+	shift = (size_t)from & 0x3;
+	if (shift) {
+		chunk = min_t(size_t, 4 - shift, len);
+		data = readl_relaxed(from - shift);
+		memcpy(to, &data + shift, chunk);
+		from += chunk;
+		to += chunk;
+		len -= chunk;
+	}
+
+	while (len >= 4) {
+		data = readl_relaxed(from);
+		memcpy(to, &data, 4);
+		from += 4;
+		to += 4;
+		len -= 4;
+	}
+
+	if (len) {
+		data = readl_relaxed(from);
+		memcpy(to, &data, len);
+	}
+}
+
+static ssize_t dw_spi_bt1_dirmap_read(struct spi_mem_dirmap_desc *desc,
+				      u64 offs, size_t len, void *buf)
+{
+	struct dw_spi_bt1 *dwsbt1 = to_dw_spi_bt1(desc->mem->spi->controller);
+	struct dw_spi *dws = &dwsbt1->dws;
+	struct spi_mem *mem = desc->mem;
+	struct dw_spi_cfg cfg;
+	int ret;
+
+	/*
+	 * Make sure the requested operation length is valid. Truncate the
+	 * length if it's greater than the length of the MMIO region.
+	 */
+	if (offs >= dwsbt1->map_len || !len)
+		return 0;
+
+	len = min_t(size_t, len, dwsbt1->map_len - offs);
+
+	/* Collect the controller configuration required by the operation */
+	cfg.tmode = SPI_TMOD_EPROMREAD;
+	cfg.dfs = 8;
+	cfg.ndf = 4;
+	cfg.freq = mem->spi->max_speed_hz;
+
+	/* Make sure the corresponding CS is de-asserted on transmission */
+	dw_spi_set_cs(mem->spi, false);
+
+	spi_enable_chip(dws, 0);
+
+	dw_spi_update_config(dws, mem->spi, &cfg);
+
+	spi_umask_intr(dws, SPI_INT_RXFI);
+
+	spi_enable_chip(dws, 1);
+
+	/*
+	 * Enable the transparent mode of the System Boot Controller.
+	 * The SPI core IO should have been locked before calling this method
+	 * so noone would be touching the controller' registers during the
+	 * dirmap operation.
+	 */
+	ret = mux_control_select(dwsbt1->mux, BT1_BOOT_DIRMAP);
+	if (ret)
+		return ret;
+
+	dw_spi_bt1_dirmap_copy_from_map(buf, dwsbt1->map + offs, len);
+
+	mux_control_deselect(dwsbt1->mux);
+
+	dw_spi_set_cs(mem->spi, true);
+
+	ret = dw_spi_check_status(dws, true);
+
+	return ret ?: len;
+}
+
+#endif /* CONFIG_SPI_DW_BT1_DIRMAP */
+
+static int dw_spi_bt1_std_init(struct platform_device *pdev,
+			       struct dw_spi_bt1 *dwsbt1)
+{
+	struct dw_spi *dws = &dwsbt1->dws;
+
+	dws->irq = platform_get_irq(pdev, 0);
+	if (dws->irq < 0)
+		return dws->irq;
+
+	dws->num_cs = 4;
+
+	/*
+	 * Baikal-T1 Normal SPI Controllers don't always keep up with full SPI
+	 * bus speed especially when it comes to the concurrent access to the
+	 * APB bus resources. Thus we have no choice but to set a constraint on
+	 * the SPI bus frequency for the memory operations which require to
+	 * read/write data as fast as possible.
+	 */
+	dws->max_mem_freq = 20000000U;
+
+	dw_spi_dma_setup_generic(dws);
+
+	return 0;
+}
+
+static int dw_spi_bt1_sys_init(struct platform_device *pdev,
+			       struct dw_spi_bt1 *dwsbt1)
+{
+	struct resource *mem __maybe_unused;
+	struct dw_spi *dws = &dwsbt1->dws;
+
+	/*
+	 * Baikal-T1 System Boot Controller is equipped with a mux, which
+	 * switches between the directly mapped SPI flash access mode and
+	 * IO access to the DW APB SSI registers. Note the mux controller
+	 * must be setup to preserve the registers being accessible by default
+	 * (on idle-state).
+	 */
+	dwsbt1->mux = devm_mux_control_get(&pdev->dev, NULL);
+	if (IS_ERR(dwsbt1->mux))
+		return PTR_ERR(dwsbt1->mux);
+
+	/*
+	 * Directly mapped SPI flash memory is a 16MB MMIO region, which can be
+	 * used to access a peripheral memory device just by reading/writing
+	 * data from/to it. Note the system APB bus will stall during each IO
+	 * from/to the dirmap region until the operation is finished. So don't
+	 * use it concurrently with time-critical tasks (like the SPI memory
+	 * operations implemented in the DW APB SSI driver).
+	 */
+#ifdef CONFIG_SPI_DW_BT1_DIRMAP
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (mem) {
+		dwsbt1->map = devm_ioremap_resource(&pdev->dev, mem);
+		if (!IS_ERR(dwsbt1->map)) {
+			dwsbt1->map_len = (mem->end - mem->start + 1);
+			dws->mem_ops.dirmap_create = dw_spi_bt1_dirmap_create;
+			dws->mem_ops.dirmap_read = dw_spi_bt1_dirmap_read;
+		} else {
+			dwsbt1->map = NULL;
+		}
+	}
+#endif /* CONFIG_SPI_DW_BT1_DIRMAP */
+
+	/*
+	 * There is no IRQ, no DMA and just one CS available on the System Boot
+	 * SPI controller.
+	 */
+	dws->irq = IRQ_NOTCONNECTED;
+	dws->num_cs = 1;
+
+	/*
+	 * Baikal-T1 System Boot SPI Controller doesn't keep up with the full
+	 * SPI bus speed due to relatively slow APB bus and races for it'
+	 * resources from different CPUs. The situation is worsen by a small
+	 * FIFOs depth (just 8 words). It works better in a single CPU mode
+	 * though, but still tends to be not fast enough at low CPU
+	 * frequencies.
+	 */
+	if (num_possible_cpus() > 1)
+		dws->max_mem_freq = 10000000U;
+	else
+		dws->max_mem_freq = 20000000U;
+
+	return 0;
+}
+
+static int dw_spi_bt1_probe(struct platform_device *pdev)
+{
+	dw_spi_bt1_init_cb init_func;
+	struct dw_spi_bt1 *dwsbt1;
+	struct resource *mem;
+	struct dw_spi *dws;
+	int ret;
+
+	dwsbt1 = devm_kzalloc(&pdev->dev, sizeof(struct dw_spi_bt1), GFP_KERNEL);
+	if (!dwsbt1)
+		return -ENOMEM;
+
+	dws = &dwsbt1->dws;
+
+	dws->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
+	if (IS_ERR(dws->regs))
+		return PTR_ERR(dws->regs);
+
+	dws->paddr = mem->start;
+
+	dwsbt1->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dwsbt1->clk))
+		return PTR_ERR(dwsbt1->clk);
+
+	ret = clk_prepare_enable(dwsbt1->clk);
+	if (ret)
+		return ret;
+
+	dws->bus_num = pdev->id;
+	dws->reg_io_width = 4;
+	dws->max_freq = clk_get_rate(dwsbt1->clk);
+	if (!dws->max_freq)
+		goto err_disable_clk;
+
+	init_func = device_get_match_data(&pdev->dev);
+	ret = init_func(pdev, dwsbt1);
+	if (ret)
+		goto err_disable_clk;
+
+	pm_runtime_enable(&pdev->dev);
+
+	ret = dw_spi_add_host(&pdev->dev, dws);
+	if (ret)
+		goto err_disable_clk;
+
+	platform_set_drvdata(pdev, dwsbt1);
+
+	return 0;
+
+err_disable_clk:
+	clk_disable_unprepare(dwsbt1->clk);
+
+	return ret;
+}
+
+static int dw_spi_bt1_remove(struct platform_device *pdev)
+{
+	struct dw_spi_bt1 *dwsbt1 = platform_get_drvdata(pdev);
+
+	dw_spi_remove_host(&dwsbt1->dws);
+
+	pm_runtime_disable(&pdev->dev);
+
+	clk_disable_unprepare(dwsbt1->clk);
+
+	return 0;
+}
+
+static const struct of_device_id dw_spi_bt1_of_match[] = {
+	{ .compatible = "baikal,bt1-ssi", .data = dw_spi_bt1_std_init},
+	{ .compatible = "baikal,bt1-sys-ssi", .data = dw_spi_bt1_sys_init},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, dw_spi_bt1_of_match);
+
+static struct platform_driver dw_spi_bt1_driver = {
+	.probe	= dw_spi_bt1_probe,
+	.remove	= dw_spi_bt1_remove,
+	.driver	= {
+		.name		= "bt1-sys-ssi",
+		.of_match_table	= dw_spi_bt1_of_match,
+	},
+};
+module_platform_driver(dw_spi_bt1_driver);
+
+MODULE_AUTHOR("Serge Semin <Sergey.Semin@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal-T1 System Boot SPI Controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c
index 323c66c..2e50cc0 100644
--- a/drivers/spi/spi-dw-core.c
+++ b/drivers/spi/spi-dw-core.c
@@ -8,10 +8,14 @@
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/preempt.h>
 #include <linux/highmem.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+#include <linux/string.h>
+#include <linux/of.h>
 
 #include "spi-dw.h"
 
@@ -19,13 +23,10 @@
 #include <linux/debugfs.h>
 #endif
 
-/* Slave spi_dev related */
+/* Slave spi_device related */
 struct chip_data {
-	u8 tmode;		/* TR/TO/RO/EEPROM */
-	u8 type;		/* SPI/SSP/MicroWire */
-
-	u16 clk_div;		/* baud rate divider */
-	u32 speed_hz;		/* baud rate */
+	u32 cr0;
+	u32 rx_sample_dly;	/* RX sample delay */
 };
 
 #ifdef CONFIG_DEBUG_FS
@@ -52,6 +53,7 @@
 	DW_SPI_DBGFS_REG("DMACR", DW_SPI_DMACR),
 	DW_SPI_DBGFS_REG("DMATDLR", DW_SPI_DMATDLR),
 	DW_SPI_DBGFS_REG("DMARDLR", DW_SPI_DMARDLR),
+	DW_SPI_DBGFS_REG("RX_SAMPLE_DLY", DW_SPI_RX_SAMPLE_DLY),
 };
 
 static int dw_spi_debugfs_init(struct dw_spi *dws)
@@ -101,7 +103,7 @@
 	 */
 	if (cs_high == enable)
 		dw_writel(dws, DW_SPI_SER, BIT(spi->chip_select));
-	else if (dws->cs_override)
+	else
 		dw_writel(dws, DW_SPI_SER, 0);
 }
 EXPORT_SYMBOL_GPL(dw_spi_set_cs);
@@ -109,9 +111,8 @@
 /* Return the max entries we can fill into tx fifo */
 static inline u32 tx_max(struct dw_spi *dws)
 {
-	u32 tx_left, tx_room, rxtx_gap;
+	u32 tx_room, rxtx_gap;
 
-	tx_left = (dws->tx_end - dws->tx) / dws->n_bytes;
 	tx_room = dws->fifo_len - dw_readl(dws, DW_SPI_TXFLR);
 
 	/*
@@ -122,93 +123,124 @@
 	 * shift registers. So a control from sw point of
 	 * view is taken.
 	 */
-	rxtx_gap =  ((dws->rx_end - dws->rx) - (dws->tx_end - dws->tx))
-			/ dws->n_bytes;
+	rxtx_gap = dws->fifo_len - (dws->rx_len - dws->tx_len);
 
-	return min3(tx_left, tx_room, (u32) (dws->fifo_len - rxtx_gap));
+	return min3((u32)dws->tx_len, tx_room, rxtx_gap);
 }
 
 /* Return the max entries we should read out of rx fifo */
 static inline u32 rx_max(struct dw_spi *dws)
 {
-	u32 rx_left = (dws->rx_end - dws->rx) / dws->n_bytes;
-
-	return min_t(u32, rx_left, dw_readl(dws, DW_SPI_RXFLR));
+	return min_t(u32, dws->rx_len, dw_readl(dws, DW_SPI_RXFLR));
 }
 
 static void dw_writer(struct dw_spi *dws)
 {
-	u32 max;
+	u32 max = tx_max(dws);
 	u16 txw = 0;
 
-	spin_lock(&dws->buf_lock);
-	max = tx_max(dws);
 	while (max--) {
-		/* Set the tx word if the transfer's original "tx" is not null */
-		if (dws->tx_end - dws->len) {
+		if (dws->tx) {
 			if (dws->n_bytes == 1)
 				txw = *(u8 *)(dws->tx);
 			else
 				txw = *(u16 *)(dws->tx);
+
+			dws->tx += dws->n_bytes;
 		}
 		dw_write_io_reg(dws, DW_SPI_DR, txw);
-		dws->tx += dws->n_bytes;
+		--dws->tx_len;
 	}
-	spin_unlock(&dws->buf_lock);
 }
 
 static void dw_reader(struct dw_spi *dws)
 {
-	u32 max;
+	u32 max = rx_max(dws);
 	u16 rxw;
 
-	spin_lock(&dws->buf_lock);
-	max = rx_max(dws);
 	while (max--) {
 		rxw = dw_read_io_reg(dws, DW_SPI_DR);
-		/* Care rx only if the transfer's original "rx" is not null */
-		if (dws->rx_end - dws->len) {
+		if (dws->rx) {
 			if (dws->n_bytes == 1)
 				*(u8 *)(dws->rx) = rxw;
 			else
 				*(u16 *)(dws->rx) = rxw;
+
+			dws->rx += dws->n_bytes;
 		}
-		dws->rx += dws->n_bytes;
+		--dws->rx_len;
 	}
-	spin_unlock(&dws->buf_lock);
 }
 
-static void int_error_stop(struct dw_spi *dws, const char *msg)
+int dw_spi_check_status(struct dw_spi *dws, bool raw)
 {
-	spi_reset_chip(dws);
+	u32 irq_status;
+	int ret = 0;
 
-	dev_err(&dws->master->dev, "%s\n", msg);
-	dws->master->cur_msg->status = -EIO;
-	spi_finalize_current_transfer(dws->master);
+	if (raw)
+		irq_status = dw_readl(dws, DW_SPI_RISR);
+	else
+		irq_status = dw_readl(dws, DW_SPI_ISR);
+
+	if (irq_status & SPI_INT_RXOI) {
+		dev_err(&dws->master->dev, "RX FIFO overflow detected\n");
+		ret = -EIO;
+	}
+
+	if (irq_status & SPI_INT_RXUI) {
+		dev_err(&dws->master->dev, "RX FIFO underflow detected\n");
+		ret = -EIO;
+	}
+
+	if (irq_status & SPI_INT_TXOI) {
+		dev_err(&dws->master->dev, "TX FIFO overflow detected\n");
+		ret = -EIO;
+	}
+
+	/* Generically handle the erroneous situation */
+	if (ret) {
+		spi_reset_chip(dws);
+		if (dws->master->cur_msg)
+			dws->master->cur_msg->status = ret;
+	}
+
+	return ret;
 }
+EXPORT_SYMBOL_GPL(dw_spi_check_status);
 
-static irqreturn_t interrupt_transfer(struct dw_spi *dws)
+static irqreturn_t dw_spi_transfer_handler(struct dw_spi *dws)
 {
 	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
 
-	/* Error handling */
-	if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) {
-		dw_readl(dws, DW_SPI_ICR);
-		int_error_stop(dws, "interrupt_transfer: fifo overrun/underrun");
-		return IRQ_HANDLED;
-	}
-
-	dw_reader(dws);
-	if (dws->rx_end == dws->rx) {
-		spi_mask_intr(dws, SPI_INT_TXEI);
+	if (dw_spi_check_status(dws, false)) {
 		spi_finalize_current_transfer(dws->master);
 		return IRQ_HANDLED;
 	}
+
+	/*
+	 * Read data from the Rx FIFO every time we've got a chance executing
+	 * this method. If there is nothing left to receive, terminate the
+	 * procedure. Otherwise adjust the Rx FIFO Threshold level if it's a
+	 * final stage of the transfer. By doing so we'll get the next IRQ
+	 * right when the leftover incoming data is received.
+	 */
+	dw_reader(dws);
+	if (!dws->rx_len) {
+		spi_mask_intr(dws, 0xff);
+		spi_finalize_current_transfer(dws->master);
+	} else if (dws->rx_len <= dw_readl(dws, DW_SPI_RXFTLR)) {
+		dw_writel(dws, DW_SPI_RXFTLR, dws->rx_len - 1);
+	}
+
+	/*
+	 * Send data out if Tx FIFO Empty IRQ is received. The IRQ will be
+	 * disabled after the data transmission is finished so not to
+	 * have the TXE IRQ flood at the final stage of the transfer.
+	 */
 	if (irq_status & SPI_INT_TXEI) {
-		spi_mask_intr(dws, SPI_INT_TXEI);
 		dw_writer(dws);
-		/* Enable TX irq always, it will be disabled when RX finished */
-		spi_umask_intr(dws, SPI_INT_TXEI);
+		if (!dws->tx_len)
+			spi_mask_intr(dws, SPI_INT_TXEI);
 	}
 
 	return IRQ_HANDLED;
@@ -224,105 +256,176 @@
 		return IRQ_NONE;
 
 	if (!master->cur_msg) {
-		spi_mask_intr(dws, SPI_INT_TXEI);
+		spi_mask_intr(dws, 0xff);
 		return IRQ_HANDLED;
 	}
 
 	return dws->transfer_handler(dws);
 }
 
-/* Configure CTRLR0 for DW_apb_ssi */
-u32 dw_spi_update_cr0(struct spi_controller *master, struct spi_device *spi,
-		      struct spi_transfer *transfer)
+static u32 dw_spi_prepare_cr0(struct dw_spi *dws, struct spi_device *spi)
 {
-	struct chip_data *chip = spi_get_ctldata(spi);
-	u32 cr0;
+	u32 cr0 = 0;
 
-	/* Default SPI mode is SCPOL = 0, SCPH = 0 */
-	cr0 = (transfer->bits_per_word - 1)
-		| (chip->type << SPI_FRF_OFFSET)
-		| ((((spi->mode & SPI_CPOL) ? 1 : 0) << SPI_SCOL_OFFSET) |
-		   (((spi->mode & SPI_CPHA) ? 1 : 0) << SPI_SCPH_OFFSET) |
-		   (((spi->mode & SPI_LOOP) ? 1 : 0) << SPI_SRL_OFFSET))
-		| (chip->tmode << SPI_TMOD_OFFSET);
+	if (!(dws->caps & DW_SPI_CAP_DWC_SSI)) {
+		/* CTRLR0[ 5: 4] Frame Format */
+		cr0 |= SSI_MOTO_SPI << SPI_FRF_OFFSET;
+
+		/*
+		 * SPI mode (SCPOL|SCPH)
+		 * CTRLR0[ 6] Serial Clock Phase
+		 * CTRLR0[ 7] Serial Clock Polarity
+		 */
+		cr0 |= ((spi->mode & SPI_CPOL) ? 1 : 0) << SPI_SCOL_OFFSET;
+		cr0 |= ((spi->mode & SPI_CPHA) ? 1 : 0) << SPI_SCPH_OFFSET;
+
+		/* CTRLR0[11] Shift Register Loop */
+		cr0 |= ((spi->mode & SPI_LOOP) ? 1 : 0) << SPI_SRL_OFFSET;
+	} else {
+		/* CTRLR0[ 7: 6] Frame Format */
+		cr0 |= SSI_MOTO_SPI << DWC_SSI_CTRLR0_FRF_OFFSET;
+
+		/*
+		 * SPI mode (SCPOL|SCPH)
+		 * CTRLR0[ 8] Serial Clock Phase
+		 * CTRLR0[ 9] Serial Clock Polarity
+		 */
+		cr0 |= ((spi->mode & SPI_CPOL) ? 1 : 0) << DWC_SSI_CTRLR0_SCPOL_OFFSET;
+		cr0 |= ((spi->mode & SPI_CPHA) ? 1 : 0) << DWC_SSI_CTRLR0_SCPH_OFFSET;
+
+		/* CTRLR0[13] Shift Register Loop */
+		cr0 |= ((spi->mode & SPI_LOOP) ? 1 : 0) << DWC_SSI_CTRLR0_SRL_OFFSET;
+
+		if (dws->caps & DW_SPI_CAP_KEEMBAY_MST)
+			cr0 |= DWC_SSI_CTRLR0_KEEMBAY_MST;
+	}
 
 	return cr0;
 }
-EXPORT_SYMBOL_GPL(dw_spi_update_cr0);
 
-/* Configure CTRLR0 for DWC_ssi */
-u32 dw_spi_update_cr0_v1_01a(struct spi_controller *master,
-			     struct spi_device *spi,
-			     struct spi_transfer *transfer)
+void dw_spi_update_config(struct dw_spi *dws, struct spi_device *spi,
+			  struct dw_spi_cfg *cfg)
 {
 	struct chip_data *chip = spi_get_ctldata(spi);
-	u32 cr0;
+	u32 cr0 = chip->cr0;
+	u32 speed_hz;
+	u16 clk_div;
 
-	/* CTRLR0[ 4: 0] Data Frame Size */
-	cr0 = (transfer->bits_per_word - 1);
+	/* CTRLR0[ 4/3: 0] Data Frame Size */
+	cr0 |= (cfg->dfs - 1);
 
-	/* CTRLR0[ 7: 6] Frame Format */
-	cr0 |= chip->type << DWC_SSI_CTRLR0_FRF_OFFSET;
+	if (!(dws->caps & DW_SPI_CAP_DWC_SSI))
+		/* CTRLR0[ 9:8] Transfer Mode */
+		cr0 |= cfg->tmode << SPI_TMOD_OFFSET;
+	else
+		/* CTRLR0[11:10] Transfer Mode */
+		cr0 |= cfg->tmode << DWC_SSI_CTRLR0_TMOD_OFFSET;
+
+	dw_writel(dws, DW_SPI_CTRLR0, cr0);
+
+	if (cfg->tmode == SPI_TMOD_EPROMREAD || cfg->tmode == SPI_TMOD_RO)
+		dw_writel(dws, DW_SPI_CTRLR1, cfg->ndf ? cfg->ndf - 1 : 0);
+
+	/* Note DW APB SSI clock divider doesn't support odd numbers */
+	clk_div = (DIV_ROUND_UP(dws->max_freq, cfg->freq) + 1) & 0xfffe;
+	speed_hz = dws->max_freq / clk_div;
+
+	if (dws->current_freq != speed_hz) {
+		spi_set_clk(dws, clk_div);
+		dws->current_freq = speed_hz;
+	}
+
+	/* Update RX sample delay if required */
+	if (dws->cur_rx_sample_dly != chip->rx_sample_dly) {
+		dw_writel(dws, DW_SPI_RX_SAMPLE_DLY, chip->rx_sample_dly);
+		dws->cur_rx_sample_dly = chip->rx_sample_dly;
+	}
+}
+EXPORT_SYMBOL_GPL(dw_spi_update_config);
+
+static void dw_spi_irq_setup(struct dw_spi *dws)
+{
+	u16 level;
+	u8 imask;
 
 	/*
-	 * SPI mode (SCPOL|SCPH)
-	 * CTRLR0[ 8] Serial Clock Phase
-	 * CTRLR0[ 9] Serial Clock Polarity
+	 * Originally Tx and Rx data lengths match. Rx FIFO Threshold level
+	 * will be adjusted at the final stage of the IRQ-based SPI transfer
+	 * execution so not to lose the leftover of the incoming data.
 	 */
-	cr0 |= ((spi->mode & SPI_CPOL) ? 1 : 0) << DWC_SSI_CTRLR0_SCPOL_OFFSET;
-	cr0 |= ((spi->mode & SPI_CPHA) ? 1 : 0) << DWC_SSI_CTRLR0_SCPH_OFFSET;
+	level = min_t(u16, dws->fifo_len / 2, dws->tx_len);
+	dw_writel(dws, DW_SPI_TXFTLR, level);
+	dw_writel(dws, DW_SPI_RXFTLR, level - 1);
 
-	/* CTRLR0[11:10] Transfer Mode */
-	cr0 |= chip->tmode << DWC_SSI_CTRLR0_TMOD_OFFSET;
+	imask = SPI_INT_TXEI | SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI |
+		SPI_INT_RXFI;
+	spi_umask_intr(dws, imask);
 
-	/* CTRLR0[13] Shift Register Loop */
-	cr0 |= ((spi->mode & SPI_LOOP) ? 1 : 0) << DWC_SSI_CTRLR0_SRL_OFFSET;
-
-	return cr0;
+	dws->transfer_handler = dw_spi_transfer_handler;
 }
-EXPORT_SYMBOL_GPL(dw_spi_update_cr0_v1_01a);
+
+/*
+ * The iterative procedure of the poll-based transfer is simple: write as much
+ * as possible to the Tx FIFO, wait until the pending to receive data is ready
+ * to be read, read it from the Rx FIFO and check whether the performed
+ * procedure has been successful.
+ *
+ * Note this method the same way as the IRQ-based transfer won't work well for
+ * the SPI devices connected to the controller with native CS due to the
+ * automatic CS assertion/de-assertion.
+ */
+static int dw_spi_poll_transfer(struct dw_spi *dws,
+				struct spi_transfer *transfer)
+{
+	struct spi_delay delay;
+	u16 nbits;
+	int ret;
+
+	delay.unit = SPI_DELAY_UNIT_SCK;
+	nbits = dws->n_bytes * BITS_PER_BYTE;
+
+	do {
+		dw_writer(dws);
+
+		delay.value = nbits * (dws->rx_len - dws->tx_len);
+		spi_delay_exec(&delay, transfer);
+
+		dw_reader(dws);
+
+		ret = dw_spi_check_status(dws, true);
+		if (ret)
+			return ret;
+	} while (dws->rx_len);
+
+	return 0;
+}
 
 static int dw_spi_transfer_one(struct spi_controller *master,
 		struct spi_device *spi, struct spi_transfer *transfer)
 {
 	struct dw_spi *dws = spi_controller_get_devdata(master);
-	struct chip_data *chip = spi_get_ctldata(spi);
-	unsigned long flags;
-	u8 imask = 0;
-	u16 txlevel = 0;
-	u32 cr0;
+	struct dw_spi_cfg cfg = {
+		.tmode = SPI_TMOD_TR,
+		.dfs = transfer->bits_per_word,
+		.freq = transfer->speed_hz,
+	};
 	int ret;
 
 	dws->dma_mapped = 0;
-	spin_lock_irqsave(&dws->buf_lock, flags);
+	dws->n_bytes = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
 	dws->tx = (void *)transfer->tx_buf;
-	dws->tx_end = dws->tx + transfer->len;
+	dws->tx_len = transfer->len / dws->n_bytes;
 	dws->rx = transfer->rx_buf;
-	dws->rx_end = dws->rx + transfer->len;
-	dws->len = transfer->len;
-	spin_unlock_irqrestore(&dws->buf_lock, flags);
+	dws->rx_len = dws->tx_len;
 
-	/* Ensure dw->rx and dw->rx_end are visible */
+	/* Ensure the data above is visible for all CPUs */
 	smp_mb();
 
 	spi_enable_chip(dws, 0);
 
-	/* Handle per transfer options for bpw and speed */
-	if (transfer->speed_hz != dws->current_freq) {
-		if (transfer->speed_hz != chip->speed_hz) {
-			/* clk_div doesn't support odd number */
-			chip->clk_div = (DIV_ROUND_UP(dws->max_freq, transfer->speed_hz) + 1) & 0xfffe;
-			chip->speed_hz = transfer->speed_hz;
-		}
-		dws->current_freq = transfer->speed_hz;
-		spi_set_clk(dws, chip->clk_div);
-	}
+	dw_spi_update_config(dws, spi, &cfg);
 
-	transfer->effective_speed_hz = dws->max_freq / chip->clk_div;
-	dws->n_bytes = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
-
-	cr0 = dws->update_cr0(master, spi, transfer);
-	dw_writel(dws, DW_SPI_CTRLR0, cr0);
+	transfer->effective_speed_hz = dws->current_freq;
 
 	/* Check if current transfer is a DMA transaction */
 	if (master->can_dma && master->can_dma(master, spi, transfer))
@@ -331,32 +434,20 @@
 	/* For poll mode just disable all interrupts */
 	spi_mask_intr(dws, 0xff);
 
-	/*
-	 * Interrupt mode
-	 * we only need set the TXEI IRQ, as TX/RX always happen syncronizely
-	 */
 	if (dws->dma_mapped) {
 		ret = dws->dma_ops->dma_setup(dws, transfer);
-		if (ret < 0) {
-			spi_enable_chip(dws, 1);
+		if (ret)
 			return ret;
-		}
-	} else {
-		txlevel = min_t(u16, dws->fifo_len / 2, dws->len / dws->n_bytes);
-		dw_writel(dws, DW_SPI_TXFTLR, txlevel);
-
-		/* Set the interrupt mask */
-		imask |= SPI_INT_TXEI | SPI_INT_TXOI |
-			 SPI_INT_RXUI | SPI_INT_RXOI;
-		spi_umask_intr(dws, imask);
-
-		dws->transfer_handler = interrupt_transfer;
 	}
 
 	spi_enable_chip(dws, 1);
 
 	if (dws->dma_mapped)
 		return dws->dma_ops->dma_transfer(dws, transfer);
+	else if (dws->irq == IRQ_NOTCONNECTED)
+		return dw_spi_poll_transfer(dws, transfer);
+
+	dw_spi_irq_setup(dws);
 
 	return 1;
 }
@@ -372,21 +463,336 @@
 	spi_reset_chip(dws);
 }
 
+static int dw_spi_adjust_mem_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	if (op->data.dir == SPI_MEM_DATA_IN)
+		op->data.nbytes = clamp_val(op->data.nbytes, 0, SPI_NDF_MASK + 1);
+
+	return 0;
+}
+
+static bool dw_spi_supports_mem_op(struct spi_mem *mem,
+				   const struct spi_mem_op *op)
+{
+	if (op->data.buswidth > 1 || op->addr.buswidth > 1 ||
+	    op->dummy.buswidth > 1 || op->cmd.buswidth > 1)
+		return false;
+
+	return spi_mem_default_supports_op(mem, op);
+}
+
+static int dw_spi_init_mem_buf(struct dw_spi *dws, const struct spi_mem_op *op)
+{
+	unsigned int i, j, len;
+	u8 *out;
+
+	/*
+	 * Calculate the total length of the EEPROM command transfer and
+	 * either use the pre-allocated buffer or create a temporary one.
+	 */
+	len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
+	if (op->data.dir == SPI_MEM_DATA_OUT)
+		len += op->data.nbytes;
+
+	if (len <= SPI_BUF_SIZE) {
+		out = dws->buf;
+	} else {
+		out = kzalloc(len, GFP_KERNEL);
+		if (!out)
+			return -ENOMEM;
+	}
+
+	/*
+	 * Collect the operation code, address and dummy bytes into the single
+	 * buffer. If it's a transfer with data to be sent, also copy it into the
+	 * single buffer in order to speed the data transmission up.
+	 */
+	for (i = 0; i < op->cmd.nbytes; ++i)
+		out[i] = SPI_GET_BYTE(op->cmd.opcode, op->cmd.nbytes - i - 1);
+	for (j = 0; j < op->addr.nbytes; ++i, ++j)
+		out[i] = SPI_GET_BYTE(op->addr.val, op->addr.nbytes - j - 1);
+	for (j = 0; j < op->dummy.nbytes; ++i, ++j)
+		out[i] = 0x0;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT)
+		memcpy(&out[i], op->data.buf.out, op->data.nbytes);
+
+	dws->n_bytes = 1;
+	dws->tx = out;
+	dws->tx_len = len;
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		dws->rx = op->data.buf.in;
+		dws->rx_len = op->data.nbytes;
+	} else {
+		dws->rx = NULL;
+		dws->rx_len = 0;
+	}
+
+	return 0;
+}
+
+static void dw_spi_free_mem_buf(struct dw_spi *dws)
+{
+	if (dws->tx != dws->buf)
+		kfree(dws->tx);
+}
+
+static int dw_spi_write_then_read(struct dw_spi *dws, struct spi_device *spi)
+{
+	u32 room, entries, sts;
+	unsigned int len;
+	u8 *buf;
+
+	/*
+	 * At initial stage we just pre-fill the Tx FIFO in with no rush,
+	 * since native CS hasn't been enabled yet and the automatic data
+	 * transmission won't start til we do that.
+	 */
+	len = min(dws->fifo_len, dws->tx_len);
+	buf = dws->tx;
+	while (len--)
+		dw_write_io_reg(dws, DW_SPI_DR, *buf++);
+
+	/*
+	 * After setting any bit in the SER register the transmission will
+	 * start automatically. We have to keep up with that procedure
+	 * otherwise the CS de-assertion will happen whereupon the memory
+	 * operation will be pre-terminated.
+	 */
+	len = dws->tx_len - ((void *)buf - dws->tx);
+	dw_spi_set_cs(spi, false);
+	while (len) {
+		entries = readl_relaxed(dws->regs + DW_SPI_TXFLR);
+		if (!entries) {
+			dev_err(&dws->master->dev, "CS de-assertion on Tx\n");
+			return -EIO;
+		}
+		room = min(dws->fifo_len - entries, len);
+		for (; room; --room, --len)
+			dw_write_io_reg(dws, DW_SPI_DR, *buf++);
+	}
+
+	/*
+	 * Data fetching will start automatically if the EEPROM-read mode is
+	 * activated. We have to keep up with the incoming data pace to
+	 * prevent the Rx FIFO overflow causing the inbound data loss.
+	 */
+	len = dws->rx_len;
+	buf = dws->rx;
+	while (len) {
+		entries = readl_relaxed(dws->regs + DW_SPI_RXFLR);
+		if (!entries) {
+			sts = readl_relaxed(dws->regs + DW_SPI_RISR);
+			if (sts & SPI_INT_RXOI) {
+				dev_err(&dws->master->dev, "FIFO overflow on Rx\n");
+				return -EIO;
+			}
+			continue;
+		}
+		entries = min(entries, len);
+		for (; entries; --entries, --len)
+			*buf++ = dw_read_io_reg(dws, DW_SPI_DR);
+	}
+
+	return 0;
+}
+
+static inline bool dw_spi_ctlr_busy(struct dw_spi *dws)
+{
+	return dw_readl(dws, DW_SPI_SR) & SR_BUSY;
+}
+
+static int dw_spi_wait_mem_op_done(struct dw_spi *dws)
+{
+	int retry = SPI_WAIT_RETRIES;
+	struct spi_delay delay;
+	unsigned long ns, us;
+	u32 nents;
+
+	nents = dw_readl(dws, DW_SPI_TXFLR);
+	ns = NSEC_PER_SEC / dws->current_freq * nents;
+	ns *= dws->n_bytes * BITS_PER_BYTE;
+	if (ns <= NSEC_PER_USEC) {
+		delay.unit = SPI_DELAY_UNIT_NSECS;
+		delay.value = ns;
+	} else {
+		us = DIV_ROUND_UP(ns, NSEC_PER_USEC);
+		delay.unit = SPI_DELAY_UNIT_USECS;
+		delay.value = clamp_val(us, 0, USHRT_MAX);
+	}
+
+	while (dw_spi_ctlr_busy(dws) && retry--)
+		spi_delay_exec(&delay, NULL);
+
+	if (retry < 0) {
+		dev_err(&dws->master->dev, "Mem op hanged up\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void dw_spi_stop_mem_op(struct dw_spi *dws, struct spi_device *spi)
+{
+	spi_enable_chip(dws, 0);
+	dw_spi_set_cs(spi, true);
+	spi_enable_chip(dws, 1);
+}
+
+/*
+ * The SPI memory operation implementation below is the best choice for the
+ * devices, which are selected by the native chip-select lane. It's
+ * specifically developed to workaround the problem with automatic chip-select
+ * lane toggle when there is no data in the Tx FIFO buffer. Luckily the current
+ * SPI-mem core calls exec_op() callback only if the GPIO-based CS is
+ * unavailable.
+ */
+static int dw_spi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct dw_spi *dws = spi_controller_get_devdata(mem->spi->controller);
+	struct dw_spi_cfg cfg;
+	unsigned long flags;
+	int ret;
+
+	/*
+	 * Collect the outbound data into a single buffer to speed the
+	 * transmission up at least on the initial stage.
+	 */
+	ret = dw_spi_init_mem_buf(dws, op);
+	if (ret)
+		return ret;
+
+	/*
+	 * DW SPI EEPROM-read mode is required only for the SPI memory Data-IN
+	 * operation. Transmit-only mode is suitable for the rest of them.
+	 */
+	cfg.dfs = 8;
+	cfg.freq = clamp(mem->spi->max_speed_hz, 0U, dws->max_mem_freq);
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		cfg.tmode = SPI_TMOD_EPROMREAD;
+		cfg.ndf = op->data.nbytes;
+	} else {
+		cfg.tmode = SPI_TMOD_TO;
+	}
+
+	spi_enable_chip(dws, 0);
+
+	dw_spi_update_config(dws, mem->spi, &cfg);
+
+	spi_mask_intr(dws, 0xff);
+
+	spi_enable_chip(dws, 1);
+
+	/*
+	 * DW APB SSI controller has very nasty peculiarities. First originally
+	 * (without any vendor-specific modifications) it doesn't provide a
+	 * direct way to set and clear the native chip-select signal. Instead
+	 * the controller asserts the CS lane if Tx FIFO isn't empty and a
+	 * transmission is going on, and automatically de-asserts it back to
+	 * the high level if the Tx FIFO doesn't have anything to be pushed
+	 * out. Due to that a multi-tasking or heavy IRQs activity might be
+	 * fatal, since the transfer procedure preemption may cause the Tx FIFO
+	 * getting empty and sudden CS de-assertion, which in the middle of the
+	 * transfer will most likely cause the data loss. Secondly the
+	 * EEPROM-read or Read-only DW SPI transfer modes imply the incoming
+	 * data being automatically pulled in into the Rx FIFO. So if the
+	 * driver software is late in fetching the data from the FIFO before
+	 * it's overflown, new incoming data will be lost. In order to make
+	 * sure the executed memory operations are CS-atomic and to prevent the
+	 * Rx FIFO overflow we have to disable the local interrupts so to block
+	 * any preemption during the subsequent IO operations.
+	 *
+	 * Note. At some circumstances disabling IRQs may not help to prevent
+	 * the problems described above. The CS de-assertion and Rx FIFO
+	 * overflow may still happen due to the relatively slow system bus or
+	 * CPU not working fast enough, so the write-then-read algo implemented
+	 * here just won't keep up with the SPI bus data transfer. Such
+	 * situation is highly platform specific and is supposed to be fixed by
+	 * manually restricting the SPI bus frequency using the
+	 * dws->max_mem_freq parameter.
+	 */
+	local_irq_save(flags);
+	preempt_disable();
+
+	ret = dw_spi_write_then_read(dws, mem->spi);
+
+	local_irq_restore(flags);
+	preempt_enable();
+
+	/*
+	 * Wait for the operation being finished and check the controller
+	 * status only if there hasn't been any run-time error detected. In the
+	 * former case it's just pointless. In the later one to prevent an
+	 * additional error message printing since any hw error flag being set
+	 * would be due to an error detected on the data transfer.
+	 */
+	if (!ret) {
+		ret = dw_spi_wait_mem_op_done(dws);
+		if (!ret)
+			ret = dw_spi_check_status(dws, true);
+	}
+
+	dw_spi_stop_mem_op(dws, mem->spi);
+
+	dw_spi_free_mem_buf(dws);
+
+	return ret;
+}
+
+/*
+ * Initialize the default memory operations if a glue layer hasn't specified
+ * custom ones. Direct mapping operations will be preserved anyway since DW SPI
+ * controller doesn't have an embedded dirmap interface. Note the memory
+ * operations implemented in this driver is the best choice only for the DW APB
+ * SSI controller with standard native CS functionality. If a hardware vendor
+ * has fixed the automatic CS assertion/de-assertion peculiarity, then it will
+ * be safer to use the normal SPI-messages-based transfers implementation.
+ */
+static void dw_spi_init_mem_ops(struct dw_spi *dws)
+{
+	if (!dws->mem_ops.exec_op && !(dws->caps & DW_SPI_CAP_CS_OVERRIDE) &&
+	    !dws->set_cs) {
+		dws->mem_ops.adjust_op_size = dw_spi_adjust_mem_op_size;
+		dws->mem_ops.supports_op = dw_spi_supports_mem_op;
+		dws->mem_ops.exec_op = dw_spi_exec_mem_op;
+		if (!dws->max_mem_freq)
+			dws->max_mem_freq = dws->max_freq;
+	}
+}
+
 /* This may be called twice for each spi dev */
 static int dw_spi_setup(struct spi_device *spi)
 {
+	struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
 	struct chip_data *chip;
 
 	/* Only alloc on first setup */
 	chip = spi_get_ctldata(spi);
 	if (!chip) {
+		struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
+		u32 rx_sample_dly_ns;
+
 		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
 		if (!chip)
 			return -ENOMEM;
 		spi_set_ctldata(spi, chip);
+		/* Get specific / default rx-sample-delay */
+		if (device_property_read_u32(&spi->dev,
+					     "rx-sample-delay-ns",
+					     &rx_sample_dly_ns) != 0)
+			/* Use default controller value */
+			rx_sample_dly_ns = dws->def_rx_sample_dly_ns;
+		chip->rx_sample_dly = DIV_ROUND_CLOSEST(rx_sample_dly_ns,
+							NSEC_PER_SEC /
+							dws->max_freq);
 	}
 
-	chip->tmode = SPI_TMOD_TR;
+	/*
+	 * Update CR0 data each time the setup callback is invoked since
+	 * the device parameters could have been changed, for instance, by
+	 * the MMC SPI driver or something else.
+	 */
+	chip->cr0 = dw_spi_prepare_cr0(dws, spi);
 
 	return 0;
 }
@@ -423,7 +829,7 @@
 	}
 
 	/* enable HW fixup for explicit CS deselect for Amazon's alpine chip */
-	if (dws->cs_override)
+	if (dws->caps & DW_SPI_CAP_CS_OVERRIDE)
 		dw_writel(dws, DW_SPI_CS_OVERRIDE, 0xF);
 }
 
@@ -440,19 +846,22 @@
 		return -ENOMEM;
 
 	dws->master = master;
-	dws->type = SSI_MOTO_SPI;
 	dws->dma_addr = (dma_addr_t)(dws->paddr + DW_SPI_DR);
-	spin_lock_init(&dws->buf_lock);
 
 	spi_controller_set_devdata(master, dws);
 
+	/* Basic HW init */
+	spi_hw_init(dev, dws);
+
 	ret = request_irq(dws->irq, dw_spi_irq, IRQF_SHARED, dev_name(dev),
 			  master);
-	if (ret < 0) {
+	if (ret < 0 && ret != -ENOTCONN) {
 		dev_err(dev, "can not get IRQ\n");
 		goto err_free_master;
 	}
 
+	dw_spi_init_mem_ops(dws);
+
 	master->use_gpio_descriptors = true;
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP;
 	master->bits_per_word_mask =  SPI_BPW_RANGE_MASK(4, 16);
@@ -460,20 +869,22 @@
 	master->num_chipselect = dws->num_cs;
 	master->setup = dw_spi_setup;
 	master->cleanup = dw_spi_cleanup;
-	master->set_cs = dw_spi_set_cs;
+	if (dws->set_cs)
+		master->set_cs = dws->set_cs;
+	else
+		master->set_cs = dw_spi_set_cs;
 	master->transfer_one = dw_spi_transfer_one;
 	master->handle_err = dw_spi_handle_err;
+	master->mem_ops = &dws->mem_ops;
 	master->max_speed_hz = dws->max_freq;
 	master->dev.of_node = dev->of_node;
 	master->dev.fwnode = dev->fwnode;
 	master->flags = SPI_MASTER_GPIO_SS;
 	master->auto_runtime_pm = true;
 
-	if (dws->set_cs)
-		master->set_cs = dws->set_cs;
-
-	/* Basic HW init */
-	spi_hw_init(dev, dws);
+	/* Get default rx sample delay */
+	device_property_read_u32(dev, "rx-sample-delay-ns",
+				 &dws->def_rx_sample_dly_ns);
 
 	if (dws->dma_ops && dws->dma_ops->dma_init) {
 		ret = dws->dma_ops->dma_init(dev, dws);
diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c
index bb390ff..a09831c 100644
--- a/drivers/spi/spi-dw-dma.c
+++ b/drivers/spi/spi-dw-dma.c
@@ -17,7 +17,6 @@
 
 #include "spi-dw.h"
 
-#define WAIT_RETRIES	5
 #define RX_BUSY		0
 #define RX_BURST_LEVEL	16
 #define TX_BUSY		1
@@ -49,6 +48,7 @@
 		max_burst = RX_BURST_LEVEL;
 
 	dws->rxburst = min(max_burst, def_burst);
+	dw_writel(dws, DW_SPI_DMARDLR, dws->rxburst - 1);
 
 	ret = dma_get_slave_caps(dws->txchan, &caps);
 	if (!ret && caps.max_burst)
@@ -56,7 +56,36 @@
 	else
 		max_burst = TX_BURST_LEVEL;
 
+	/*
+	 * Having a Rx DMA channel serviced with higher priority than a Tx DMA
+	 * channel might not be enough to provide a well balanced DMA-based
+	 * SPI transfer interface. There might still be moments when the Tx DMA
+	 * channel is occasionally handled faster than the Rx DMA channel.
+	 * That in its turn will eventually cause the SPI Rx FIFO overflow if
+	 * SPI bus speed is high enough to fill the SPI Rx FIFO in before it's
+	 * cleared by the Rx DMA channel. In order to fix the problem the Tx
+	 * DMA activity is intentionally slowed down by limiting the SPI Tx
+	 * FIFO depth with a value twice bigger than the Tx burst length.
+	 */
 	dws->txburst = min(max_burst, def_burst);
+	dw_writel(dws, DW_SPI_DMATDLR, dws->txburst);
+}
+
+static void dw_spi_dma_sg_burst_init(struct dw_spi *dws)
+{
+	struct dma_slave_caps tx = {0}, rx = {0};
+
+	dma_get_slave_caps(dws->txchan, &tx);
+	dma_get_slave_caps(dws->rxchan, &rx);
+
+	if (tx.max_sg_burst > 0 && rx.max_sg_burst > 0)
+		dws->dma_sg_burst = min(tx.max_sg_burst, rx.max_sg_burst);
+	else if (tx.max_sg_burst > 0)
+		dws->dma_sg_burst = tx.max_sg_burst;
+	else if (rx.max_sg_burst > 0)
+		dws->dma_sg_burst = rx.max_sg_burst;
+	else
+		dws->dma_sg_burst = 0;
 }
 
 static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws)
@@ -96,6 +125,8 @@
 
 	dw_spi_dma_maxburst_init(dws);
 
+	dw_spi_dma_sg_burst_init(dws);
+
 	return 0;
 
 free_rxchan:
@@ -125,6 +156,8 @@
 
 	dw_spi_dma_maxburst_init(dws);
 
+	dw_spi_dma_sg_burst_init(dws);
+
 	return 0;
 }
 
@@ -139,23 +172,14 @@
 		dmaengine_terminate_sync(dws->rxchan);
 		dma_release_channel(dws->rxchan);
 	}
-
-	dw_writel(dws, DW_SPI_DMACR, 0);
 }
 
 static irqreturn_t dw_spi_dma_transfer_handler(struct dw_spi *dws)
 {
-	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
+	dw_spi_check_status(dws, false);
 
-	if (!irq_status)
-		return IRQ_NONE;
-
-	dw_readl(dws, DW_SPI_ICR);
-	spi_reset_chip(dws);
-
-	dev_err(&dws->master->dev, "%s: FIFO overrun/underrun\n", __func__);
-	dws->master->cur_msg->status = -EIO;
 	complete(&dws->dma_completion);
+
 	return IRQ_HANDLED;
 }
 
@@ -177,12 +201,12 @@
 	return DMA_SLAVE_BUSWIDTH_UNDEFINED;
 }
 
-static int dw_spi_dma_wait(struct dw_spi *dws, struct spi_transfer *xfer)
+static int dw_spi_dma_wait(struct dw_spi *dws, unsigned int len, u32 speed)
 {
 	unsigned long long ms;
 
-	ms = xfer->len * MSEC_PER_SEC * BITS_PER_BYTE;
-	do_div(ms, xfer->effective_speed_hz);
+	ms = len * MSEC_PER_SEC * BITS_PER_BYTE;
+	do_div(ms, speed);
 	ms += ms + 200;
 
 	if (ms > UINT_MAX)
@@ -208,7 +232,7 @@
 static int dw_spi_dma_wait_tx_done(struct dw_spi *dws,
 				   struct spi_transfer *xfer)
 {
-	int retry = WAIT_RETRIES;
+	int retry = SPI_WAIT_RETRIES;
 	struct spi_delay delay;
 	u32 nents;
 
@@ -239,18 +263,12 @@
 	if (test_bit(RX_BUSY, &dws->dma_chan_busy))
 		return;
 
-	dw_writel(dws, DW_SPI_DMACR, 0);
 	complete(&dws->dma_completion);
 }
 
-static struct dma_async_tx_descriptor *
-dw_spi_dma_prepare_tx(struct dw_spi *dws, struct spi_transfer *xfer)
+static int dw_spi_dma_config_tx(struct dw_spi *dws)
 {
 	struct dma_slave_config txconf;
-	struct dma_async_tx_descriptor *txdesc;
-
-	if (!xfer->tx_buf)
-		return NULL;
 
 	memset(&txconf, 0, sizeof(txconf));
 	txconf.direction = DMA_MEM_TO_DEV;
@@ -260,20 +278,35 @@
 	txconf.dst_addr_width = dw_spi_dma_convert_width(dws->n_bytes);
 	txconf.device_fc = false;
 
-	dmaengine_slave_config(dws->txchan, &txconf);
+	return dmaengine_slave_config(dws->txchan, &txconf);
+}
 
-	txdesc = dmaengine_prep_slave_sg(dws->txchan,
-				xfer->tx_sg.sgl,
-				xfer->tx_sg.nents,
-				DMA_MEM_TO_DEV,
-				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+static int dw_spi_dma_submit_tx(struct dw_spi *dws, struct scatterlist *sgl,
+				unsigned int nents)
+{
+	struct dma_async_tx_descriptor *txdesc;
+	dma_cookie_t cookie;
+	int ret;
+
+	txdesc = dmaengine_prep_slave_sg(dws->txchan, sgl, nents,
+					 DMA_MEM_TO_DEV,
+					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!txdesc)
-		return NULL;
+		return -ENOMEM;
 
 	txdesc->callback = dw_spi_dma_tx_done;
 	txdesc->callback_param = dws;
 
-	return txdesc;
+	cookie = dmaengine_submit(txdesc);
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		dmaengine_terminate_sync(dws->txchan);
+		return ret;
+	}
+
+	set_bit(TX_BUSY, &dws->dma_chan_busy);
+
+	return 0;
 }
 
 static inline bool dw_spi_dma_rx_busy(struct dw_spi *dws)
@@ -283,7 +316,7 @@
 
 static int dw_spi_dma_wait_rx_done(struct dw_spi *dws)
 {
-	int retry = WAIT_RETRIES;
+	int retry = SPI_WAIT_RETRIES;
 	struct spi_delay delay;
 	unsigned long ns, us;
 	u32 nents;
@@ -331,18 +364,12 @@
 	if (test_bit(TX_BUSY, &dws->dma_chan_busy))
 		return;
 
-	dw_writel(dws, DW_SPI_DMACR, 0);
 	complete(&dws->dma_completion);
 }
 
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws,
-		struct spi_transfer *xfer)
+static int dw_spi_dma_config_rx(struct dw_spi *dws)
 {
 	struct dma_slave_config rxconf;
-	struct dma_async_tx_descriptor *rxdesc;
-
-	if (!xfer->rx_buf)
-		return NULL;
 
 	memset(&rxconf, 0, sizeof(rxconf));
 	rxconf.direction = DMA_DEV_TO_MEM;
@@ -352,50 +379,64 @@
 	rxconf.src_addr_width = dw_spi_dma_convert_width(dws->n_bytes);
 	rxconf.device_fc = false;
 
-	dmaengine_slave_config(dws->rxchan, &rxconf);
+	return dmaengine_slave_config(dws->rxchan, &rxconf);
+}
 
-	rxdesc = dmaengine_prep_slave_sg(dws->rxchan,
-				xfer->rx_sg.sgl,
-				xfer->rx_sg.nents,
-				DMA_DEV_TO_MEM,
-				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+static int dw_spi_dma_submit_rx(struct dw_spi *dws, struct scatterlist *sgl,
+				unsigned int nents)
+{
+	struct dma_async_tx_descriptor *rxdesc;
+	dma_cookie_t cookie;
+	int ret;
+
+	rxdesc = dmaengine_prep_slave_sg(dws->rxchan, sgl, nents,
+					 DMA_DEV_TO_MEM,
+					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!rxdesc)
-		return NULL;
+		return -ENOMEM;
 
 	rxdesc->callback = dw_spi_dma_rx_done;
 	rxdesc->callback_param = dws;
 
-	return rxdesc;
+	cookie = dmaengine_submit(rxdesc);
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		dmaengine_terminate_sync(dws->rxchan);
+		return ret;
+	}
+
+	set_bit(RX_BUSY, &dws->dma_chan_busy);
+
+	return 0;
 }
 
 static int dw_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
 {
-	u16 imr = 0, dma_ctrl = 0;
+	u16 imr, dma_ctrl;
+	int ret;
 
-	/*
-	 * Having a Rx DMA channel serviced with higher priority than a Tx DMA
-	 * channel might not be enough to provide a well balanced DMA-based
-	 * SPI transfer interface. There might still be moments when the Tx DMA
-	 * channel is occasionally handled faster than the Rx DMA channel.
-	 * That in its turn will eventually cause the SPI Rx FIFO overflow if
-	 * SPI bus speed is high enough to fill the SPI Rx FIFO in before it's
-	 * cleared by the Rx DMA channel. In order to fix the problem the Tx
-	 * DMA activity is intentionally slowed down by limiting the SPI Tx
-	 * FIFO depth with a value twice bigger than the Tx burst length
-	 * calculated earlier by the dw_spi_dma_maxburst_init() method.
-	 */
-	dw_writel(dws, DW_SPI_DMARDLR, dws->rxburst - 1);
-	dw_writel(dws, DW_SPI_DMATDLR, dws->txburst);
+	if (!xfer->tx_buf)
+		return -EINVAL;
 
-	if (xfer->tx_buf)
-		dma_ctrl |= SPI_DMA_TDMAE;
+	/* Setup DMA channels */
+	ret = dw_spi_dma_config_tx(dws);
+	if (ret)
+		return ret;
+
+	if (xfer->rx_buf) {
+		ret = dw_spi_dma_config_rx(dws);
+		if (ret)
+			return ret;
+	}
+
+	/* Set the DMA handshaking interface */
+	dma_ctrl = SPI_DMA_TDMAE;
 	if (xfer->rx_buf)
 		dma_ctrl |= SPI_DMA_RDMAE;
 	dw_writel(dws, DW_SPI_DMACR, dma_ctrl);
 
 	/* Set the interrupt mask */
-	if (xfer->tx_buf)
-		imr |= SPI_INT_TXOI;
+	imr = SPI_INT_TXOI;
 	if (xfer->rx_buf)
 		imr |= SPI_INT_RXUI | SPI_INT_RXOI;
 	spi_umask_intr(dws, imr);
@@ -407,41 +448,166 @@
 	return 0;
 }
 
-static int dw_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
+static int dw_spi_dma_transfer_all(struct dw_spi *dws,
+				   struct spi_transfer *xfer)
 {
-	struct dma_async_tx_descriptor *txdesc, *rxdesc;
 	int ret;
 
-	/* Prepare the TX dma transfer */
-	txdesc = dw_spi_dma_prepare_tx(dws, xfer);
+	/* Submit the DMA Tx transfer */
+	ret = dw_spi_dma_submit_tx(dws, xfer->tx_sg.sgl, xfer->tx_sg.nents);
+	if (ret)
+		goto err_clear_dmac;
 
-	/* Prepare the RX dma transfer */
-	rxdesc = dw_spi_dma_prepare_rx(dws, xfer);
+	/* Submit the DMA Rx transfer if required */
+	if (xfer->rx_buf) {
+		ret = dw_spi_dma_submit_rx(dws, xfer->rx_sg.sgl,
+					   xfer->rx_sg.nents);
+		if (ret)
+			goto err_clear_dmac;
 
-	/* rx must be started before tx due to spi instinct */
-	if (rxdesc) {
-		set_bit(RX_BUSY, &dws->dma_chan_busy);
-		dmaengine_submit(rxdesc);
+		/* rx must be started before tx due to spi instinct */
 		dma_async_issue_pending(dws->rxchan);
 	}
 
-	if (txdesc) {
-		set_bit(TX_BUSY, &dws->dma_chan_busy);
-		dmaengine_submit(txdesc);
+	dma_async_issue_pending(dws->txchan);
+
+	ret = dw_spi_dma_wait(dws, xfer->len, xfer->effective_speed_hz);
+
+err_clear_dmac:
+	dw_writel(dws, DW_SPI_DMACR, 0);
+
+	return ret;
+}
+
+/*
+ * In case if at least one of the requested DMA channels doesn't support the
+ * hardware accelerated SG list entries traverse, the DMA driver will most
+ * likely work that around by performing the IRQ-based SG list entries
+ * resubmission. That might and will cause a problem if the DMA Tx channel is
+ * recharged and re-executed before the Rx DMA channel. Due to
+ * non-deterministic IRQ-handler execution latency the DMA Tx channel will
+ * start pushing data to the SPI bus before the Rx DMA channel is even
+ * reinitialized with the next inbound SG list entry. By doing so the DMA Tx
+ * channel will implicitly start filling the DW APB SSI Rx FIFO up, which while
+ * the DMA Rx channel being recharged and re-executed will eventually be
+ * overflown.
+ *
+ * In order to solve the problem we have to feed the DMA engine with SG list
+ * entries one-by-one. It shall keep the DW APB SSI Tx and Rx FIFOs
+ * synchronized and prevent the Rx FIFO overflow. Since in general the tx_sg
+ * and rx_sg lists may have different number of entries of different lengths
+ * (though total length should match) let's virtually split the SG-lists to the
+ * set of DMA transfers, which length is a minimum of the ordered SG-entries
+ * lengths. An ASCII-sketch of the implemented algo is following:
+ *                  xfer->len
+ *                |___________|
+ * tx_sg list:    |___|____|__|
+ * rx_sg list:    |_|____|____|
+ * DMA transfers: |_|_|__|_|__|
+ *
+ * Note in order to have this workaround solving the denoted problem the DMA
+ * engine driver should properly initialize the max_sg_burst capability and set
+ * the DMA device max segment size parameter with maximum data block size the
+ * DMA engine supports.
+ */
+
+static int dw_spi_dma_transfer_one(struct dw_spi *dws,
+				   struct spi_transfer *xfer)
+{
+	struct scatterlist *tx_sg = NULL, *rx_sg = NULL, tx_tmp, rx_tmp;
+	unsigned int tx_len = 0, rx_len = 0;
+	unsigned int base, len;
+	int ret;
+
+	sg_init_table(&tx_tmp, 1);
+	sg_init_table(&rx_tmp, 1);
+
+	for (base = 0, len = 0; base < xfer->len; base += len) {
+		/* Fetch next Tx DMA data chunk */
+		if (!tx_len) {
+			tx_sg = !tx_sg ? &xfer->tx_sg.sgl[0] : sg_next(tx_sg);
+			sg_dma_address(&tx_tmp) = sg_dma_address(tx_sg);
+			tx_len = sg_dma_len(tx_sg);
+		}
+
+		/* Fetch next Rx DMA data chunk */
+		if (!rx_len) {
+			rx_sg = !rx_sg ? &xfer->rx_sg.sgl[0] : sg_next(rx_sg);
+			sg_dma_address(&rx_tmp) = sg_dma_address(rx_sg);
+			rx_len = sg_dma_len(rx_sg);
+		}
+
+		len = min(tx_len, rx_len);
+
+		sg_dma_len(&tx_tmp) = len;
+		sg_dma_len(&rx_tmp) = len;
+
+		/* Submit DMA Tx transfer */
+		ret = dw_spi_dma_submit_tx(dws, &tx_tmp, 1);
+		if (ret)
+			break;
+
+		/* Submit DMA Rx transfer */
+		ret = dw_spi_dma_submit_rx(dws, &rx_tmp, 1);
+		if (ret)
+			break;
+
+		/* Rx must be started before Tx due to SPI instinct */
+		dma_async_issue_pending(dws->rxchan);
+
 		dma_async_issue_pending(dws->txchan);
+
+		/*
+		 * Here we only need to wait for the DMA transfer to be
+		 * finished since SPI controller is kept enabled during the
+		 * procedure this loop implements and there is no risk to lose
+		 * data left in the Tx/Rx FIFOs.
+		 */
+		ret = dw_spi_dma_wait(dws, len, xfer->effective_speed_hz);
+		if (ret)
+			break;
+
+		reinit_completion(&dws->dma_completion);
+
+		sg_dma_address(&tx_tmp) += len;
+		sg_dma_address(&rx_tmp) += len;
+		tx_len -= len;
+		rx_len -= len;
 	}
 
-	ret = dw_spi_dma_wait(dws, xfer);
+	dw_writel(dws, DW_SPI_DMACR, 0);
+
+	return ret;
+}
+
+static int dw_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
+{
+	unsigned int nents;
+	int ret;
+
+	nents = max(xfer->tx_sg.nents, xfer->rx_sg.nents);
+
+	/*
+	 * Execute normal DMA-based transfer (which submits the Rx and Tx SG
+	 * lists directly to the DMA engine at once) if either full hardware
+	 * accelerated SG list traverse is supported by both channels, or the
+	 * Tx-only SPI transfer is requested, or the DMA engine is capable to
+	 * handle both SG lists on hardware accelerated basis.
+	 */
+	if (!dws->dma_sg_burst || !xfer->rx_buf || nents <= dws->dma_sg_burst)
+		ret = dw_spi_dma_transfer_all(dws, xfer);
+	else
+		ret = dw_spi_dma_transfer_one(dws, xfer);
 	if (ret)
 		return ret;
 
-	if (txdesc && dws->master->cur_msg->status == -EINPROGRESS) {
+	if (dws->master->cur_msg->status == -EINPROGRESS) {
 		ret = dw_spi_dma_wait_tx_done(dws, xfer);
 		if (ret)
 			return ret;
 	}
 
-	if (rxdesc && dws->master->cur_msg->status == -EINPROGRESS)
+	if (xfer->rx_buf && dws->master->cur_msg->status == -EINPROGRESS)
 		ret = dw_spi_dma_wait_rx_done(dws);
 
 	return ret;
@@ -457,8 +623,6 @@
 		dmaengine_terminate_sync(dws->rxchan);
 		clear_bit(RX_BUSY, &dws->dma_chan_busy);
 	}
-
-	dw_writel(dws, DW_SPI_DMACR, 0);
 }
 
 static const struct dw_spi_dma_ops dw_spi_dma_mfld_ops = {
diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index 403403d..d0cc5bf 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -45,16 +45,12 @@
 #define MSCC_SPI_MST_SW_MODE_SW_PIN_CTRL_MODE	BIT(13)
 #define MSCC_SPI_MST_SW_MODE_SW_SPI_CS(x)	(x << 5)
 
-/*
- * For Keem Bay, CTRLR0[31] is used to select controller mode.
- * 0: SSI is slave
- * 1: SSI is master
- */
-#define KEEMBAY_CTRLR0_SSIC_IS_MST		BIT(31)
+#define SPARX5_FORCE_ENA			0xa4
+#define SPARX5_FORCE_VAL			0xa8
 
 struct dw_spi_mscc {
 	struct regmap       *syscon;
-	void __iomem        *spi_mst;
+	void __iomem        *spi_mst; /* Not sparx5 */
 };
 
 /*
@@ -114,9 +110,6 @@
 	dwsmmio->dws.set_cs = dw_spi_mscc_set_cs;
 	dwsmmio->priv = dwsmscc;
 
-	/* Register hook to configure CTRLR0 */
-	dwsmmio->dws.update_cr0 = dw_spi_update_cr0;
-
 	return 0;
 }
 
@@ -134,13 +127,71 @@
 				JAGUAR2_IF_SI_OWNER_OFFSET);
 }
 
+/*
+ * The Designware SPI controller (referred to as master in the
+ * documentation) automatically deasserts chip select when the tx fifo
+ * is empty. The chip selects then needs to be driven by a CS override
+ * register. enable is an active low signal.
+ */
+static void dw_spi_sparx5_set_cs(struct spi_device *spi, bool enable)
+{
+	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	struct dw_spi_mmio *dwsmmio = container_of(dws, struct dw_spi_mmio, dws);
+	struct dw_spi_mscc *dwsmscc = dwsmmio->priv;
+	u8 cs = spi->chip_select;
+
+	if (!enable) {
+		/* CS override drive enable */
+		regmap_write(dwsmscc->syscon, SPARX5_FORCE_ENA, 1);
+		/* Now set CSx enabled */
+		regmap_write(dwsmscc->syscon, SPARX5_FORCE_VAL, ~BIT(cs));
+		/* Allow settle */
+		usleep_range(1, 5);
+	} else {
+		/* CS value */
+		regmap_write(dwsmscc->syscon, SPARX5_FORCE_VAL, ~0);
+		/* Allow settle */
+		usleep_range(1, 5);
+		/* CS override drive disable */
+		regmap_write(dwsmscc->syscon, SPARX5_FORCE_ENA, 0);
+	}
+
+	dw_spi_set_cs(spi, enable);
+}
+
+static int dw_spi_mscc_sparx5_init(struct platform_device *pdev,
+				   struct dw_spi_mmio *dwsmmio)
+{
+	const char *syscon_name = "microchip,sparx5-cpu-syscon";
+	struct device *dev = &pdev->dev;
+	struct dw_spi_mscc *dwsmscc;
+
+	if (!IS_ENABLED(CONFIG_SPI_MUX)) {
+		dev_err(dev, "This driver needs CONFIG_SPI_MUX\n");
+		return -EOPNOTSUPP;
+	}
+
+	dwsmscc = devm_kzalloc(dev, sizeof(*dwsmscc), GFP_KERNEL);
+	if (!dwsmscc)
+		return -ENOMEM;
+
+	dwsmscc->syscon =
+		syscon_regmap_lookup_by_compatible(syscon_name);
+	if (IS_ERR(dwsmscc->syscon)) {
+		dev_err(dev, "No syscon map %s\n", syscon_name);
+		return PTR_ERR(dwsmscc->syscon);
+	}
+
+	dwsmmio->dws.set_cs = dw_spi_sparx5_set_cs;
+	dwsmmio->priv = dwsmscc;
+
+	return 0;
+}
+
 static int dw_spi_alpine_init(struct platform_device *pdev,
 			      struct dw_spi_mmio *dwsmmio)
 {
-	dwsmmio->dws.cs_override = 1;
-
-	/* Register hook to configure CTRLR0 */
-	dwsmmio->dws.update_cr0 = dw_spi_update_cr0;
+	dwsmmio->dws.caps = DW_SPI_CAP_CS_OVERRIDE;
 
 	return 0;
 }
@@ -148,9 +199,6 @@
 static int dw_spi_dw_apb_init(struct platform_device *pdev,
 			      struct dw_spi_mmio *dwsmmio)
 {
-	/* Register hook to configure CTRLR0 */
-	dwsmmio->dws.update_cr0 = dw_spi_update_cr0;
-
 	dw_spi_dma_setup_generic(&dwsmmio->dws);
 
 	return 0;
@@ -159,28 +207,17 @@
 static int dw_spi_dwc_ssi_init(struct platform_device *pdev,
 			       struct dw_spi_mmio *dwsmmio)
 {
-	/* Register hook to configure CTRLR0 */
-	dwsmmio->dws.update_cr0 = dw_spi_update_cr0_v1_01a;
+	dwsmmio->dws.caps = DW_SPI_CAP_DWC_SSI;
 
 	dw_spi_dma_setup_generic(&dwsmmio->dws);
 
 	return 0;
 }
 
-static u32 dw_spi_update_cr0_keembay(struct spi_controller *master,
-				     struct spi_device *spi,
-				     struct spi_transfer *transfer)
-{
-	u32 cr0 = dw_spi_update_cr0_v1_01a(master, spi, transfer);
-
-	return cr0 | KEEMBAY_CTRLR0_SSIC_IS_MST;
-}
-
 static int dw_spi_keembay_init(struct platform_device *pdev,
 			       struct dw_spi_mmio *dwsmmio)
 {
-	/* Register hook to configure CTRLR0 */
-	dwsmmio->dws.update_cr0 = dw_spi_update_cr0_keembay;
+	dwsmmio->dws.caps = DW_SPI_CAP_KEEMBAY_MST | DW_SPI_CAP_DWC_SSI;
 
 	return 0;
 }
@@ -297,6 +334,7 @@
 	{ .compatible = "renesas,rzn1-spi", .data = dw_spi_dw_apb_init},
 	{ .compatible = "snps,dwc-ssi-1.01a", .data = dw_spi_dwc_ssi_init},
 	{ .compatible = "intel,keembay-ssi", .data = dw_spi_keembay_init},
+	{ .compatible = "microchip,sparx5-spi", dw_spi_mscc_sparx5_init},
 	{ /* end of table */}
 };
 MODULE_DEVICE_TABLE(of, dw_spi_mmio_of_match);
diff --git a/drivers/spi/spi-dw-pci.c b/drivers/spi/spi-dw-pci.c
index 2ea7380..8a91cd5 100644
--- a/drivers/spi/spi-dw-pci.c
+++ b/drivers/spi/spi-dw-pci.c
@@ -48,9 +48,6 @@
 
 	iounmap(clk_reg);
 
-	/* Register hook to configure CTRLR0 */
-	dws->update_cr0 = dw_spi_update_cr0;
-
 	dw_spi_dma_setup_mfld(dws);
 
 	return 0;
@@ -58,9 +55,6 @@
 
 static int spi_generic_init(struct dw_spi *dws)
 {
-	/* Register hook to configure CTRLR0 */
-	dws->update_cr0 = dw_spi_update_cr0;
-
 	dw_spi_dma_setup_generic(dws);
 
 	return 0;
@@ -127,18 +121,16 @@
 		if (desc->setup) {
 			ret = desc->setup(dws);
 			if (ret)
-				return ret;
+				goto err_free_irq_vectors;
 		}
 	} else {
-		pci_free_irq_vectors(pdev);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_free_irq_vectors;
 	}
 
 	ret = dw_spi_add_host(&pdev->dev, dws);
-	if (ret) {
-		pci_free_irq_vectors(pdev);
-		return ret;
-	}
+	if (ret)
+		goto err_free_irq_vectors;
 
 	/* PCI hook and SPI hook use the same drv data */
 	pci_set_drvdata(pdev, dws);
@@ -152,6 +144,10 @@
 	pm_runtime_allow(&pdev->dev);
 
 	return 0;
+
+err_free_irq_vectors:
+	pci_free_irq_vectors(pdev);
+	return ret;
 }
 
 static void spi_pci_remove(struct pci_dev *pdev)
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
index 151ba316..faf40cb 100644
--- a/drivers/spi/spi-dw.h
+++ b/drivers/spi/spi-dw.h
@@ -2,11 +2,13 @@
 #ifndef DW_SPI_HEADER_H
 #define DW_SPI_HEADER_H
 
+#include <linux/bits.h>
 #include <linux/completion.h>
 #include <linux/debugfs.h>
 #include <linux/irqreturn.h>
 #include <linux/io.h>
 #include <linux/scatterlist.h>
+#include <linux/spi/spi-mem.h>
 
 /* Register offsets */
 #define DW_SPI_CTRLR0			0x00
@@ -34,6 +36,7 @@
 #define DW_SPI_IDR			0x58
 #define DW_SPI_VERSION			0x5c
 #define DW_SPI_DR			0x60
+#define DW_SPI_RX_SAMPLE_DLY		0xf0
 #define DW_SPI_CS_OVERRIDE		0xf4
 
 /* Bit fields in CTRLR0 */
@@ -69,6 +72,16 @@
 #define DWC_SSI_CTRLR0_FRF_OFFSET	6
 #define DWC_SSI_CTRLR0_DFS_OFFSET	0
 
+/*
+ * For Keem Bay, CTRLR0[31] is used to select controller mode.
+ * 0: SSI is slave
+ * 1: SSI is master
+ */
+#define DWC_SSI_CTRLR0_KEEMBAY_MST	BIT(31)
+
+/* Bit fields in CTRLR1 */
+#define SPI_NDF_MASK			GENMASK(15, 0)
+
 /* Bit fields in SR, 7 bits */
 #define SR_MASK				0x7f		/* cover 7 bits */
 #define SR_BUSY				(1 << 0)
@@ -91,8 +104,12 @@
 #define SPI_DMA_RDMAE			(1 << 0)
 #define SPI_DMA_TDMAE			(1 << 1)
 
-/* TX RX interrupt level threshold, max can be 256 */
-#define SPI_INT_THRESHOLD		32
+#define SPI_WAIT_RETRIES		5
+#define SPI_BUF_SIZE \
+	(sizeof_field(struct spi_mem_op, cmd.opcode) + \
+	 sizeof_field(struct spi_mem_op, addr.val) + 256)
+#define SPI_GET_BYTE(_val, _idx) \
+	((_val) >> (BITS_PER_BYTE * (_idx)) & 0xff)
 
 enum dw_ssi_type {
 	SSI_MOTO_SPI = 0,
@@ -100,6 +117,19 @@
 	SSI_NS_MICROWIRE,
 };
 
+/* DW SPI capabilities */
+#define DW_SPI_CAP_CS_OVERRIDE		BIT(0)
+#define DW_SPI_CAP_KEEMBAY_MST		BIT(1)
+#define DW_SPI_CAP_DWC_SSI		BIT(2)
+
+/* Slave spi_transfer/spi_mem_op related */
+struct dw_spi_cfg {
+	u8 tmode;
+	u8 dfs;
+	u32 ndf;
+	u32 freq;
+};
+
 struct dw_spi;
 struct dw_spi_dma_ops {
 	int (*dma_init)(struct device *dev, struct dw_spi *dws);
@@ -113,39 +143,43 @@
 
 struct dw_spi {
 	struct spi_controller	*master;
-	enum dw_ssi_type	type;
 
 	void __iomem		*regs;
 	unsigned long		paddr;
 	int			irq;
 	u32			fifo_len;	/* depth of the FIFO buffer */
+	u32			max_mem_freq;	/* max mem-ops bus freq */
 	u32			max_freq;	/* max bus freq supported */
 
-	int			cs_override;
+	u32			caps;		/* DW SPI capabilities */
+
 	u32			reg_io_width;	/* DR I/O width in bytes */
 	u16			bus_num;
 	u16			num_cs;		/* supported slave numbers */
 	void (*set_cs)(struct spi_device *spi, bool enable);
-	u32 (*update_cr0)(struct spi_controller *master, struct spi_device *spi,
-			  struct spi_transfer *transfer);
 
 	/* Current message transfer state info */
-	size_t			len;
 	void			*tx;
-	void			*tx_end;
-	spinlock_t		buf_lock;
+	unsigned int		tx_len;
 	void			*rx;
-	void			*rx_end;
+	unsigned int		rx_len;
+	u8			buf[SPI_BUF_SIZE];
 	int			dma_mapped;
 	u8			n_bytes;	/* current is a 1/2 bytes op */
 	irqreturn_t		(*transfer_handler)(struct dw_spi *dws);
 	u32			current_freq;	/* frequency in hz */
+	u32			cur_rx_sample_dly;
+	u32			def_rx_sample_dly_ns;
+
+	/* Custom memory operations */
+	struct spi_controller_mem_ops mem_ops;
 
 	/* DMA info */
 	struct dma_chan		*txchan;
 	u32			txburst;
 	struct dma_chan		*rxchan;
 	u32			rxburst;
+	u32			dma_sg_burst;
 	unsigned long		dma_chan_busy;
 	dma_addr_t		dma_addr; /* phy address of the Data register */
 	const struct dw_spi_dma_ops *dma_ops;
@@ -162,29 +196,19 @@
 	return __raw_readl(dws->regs + offset);
 }
 
-static inline u16 dw_readw(struct dw_spi *dws, u32 offset)
-{
-	return __raw_readw(dws->regs + offset);
-}
-
 static inline void dw_writel(struct dw_spi *dws, u32 offset, u32 val)
 {
 	__raw_writel(val, dws->regs + offset);
 }
 
-static inline void dw_writew(struct dw_spi *dws, u32 offset, u16 val)
-{
-	__raw_writew(val, dws->regs + offset);
-}
-
 static inline u32 dw_read_io_reg(struct dw_spi *dws, u32 offset)
 {
 	switch (dws->reg_io_width) {
 	case 2:
-		return dw_readw(dws, offset);
+		return readw_relaxed(dws->regs + offset);
 	case 4:
 	default:
-		return dw_readl(dws, offset);
+		return readl_relaxed(dws->regs + offset);
 	}
 }
 
@@ -192,11 +216,11 @@
 {
 	switch (dws->reg_io_width) {
 	case 2:
-		dw_writew(dws, offset, val);
+		writew_relaxed(val, dws->regs + offset);
 		break;
 	case 4:
 	default:
-		dw_writel(dws, offset, val);
+		writel_relaxed(val, dws->regs + offset);
 		break;
 	}
 }
@@ -230,14 +254,16 @@
 }
 
 /*
- * This does disable the SPI controller, interrupts, and re-enable the
- * controller back. Transmit and receive FIFO buffers are cleared when the
- * device is disabled.
+ * This disables the SPI controller, interrupts, clears the interrupts status
+ * and CS, then re-enables the controller back. Transmit and receive FIFO
+ * buffers are cleared when the device is disabled.
  */
 static inline void spi_reset_chip(struct dw_spi *dws)
 {
 	spi_enable_chip(dws, 0);
 	spi_mask_intr(dws, 0xff);
+	dw_readl(dws, DW_SPI_ICR);
+	dw_writel(dws, DW_SPI_SER, 0);
 	spi_enable_chip(dws, 1);
 }
 
@@ -248,16 +274,13 @@
 }
 
 extern void dw_spi_set_cs(struct spi_device *spi, bool enable);
+extern void dw_spi_update_config(struct dw_spi *dws, struct spi_device *spi,
+				 struct dw_spi_cfg *cfg);
+extern int dw_spi_check_status(struct dw_spi *dws, bool raw);
 extern int dw_spi_add_host(struct device *dev, struct dw_spi *dws);
 extern void dw_spi_remove_host(struct dw_spi *dws);
 extern int dw_spi_suspend_host(struct dw_spi *dws);
 extern int dw_spi_resume_host(struct dw_spi *dws);
-extern u32 dw_spi_update_cr0(struct spi_controller *master,
-			     struct spi_device *spi,
-			     struct spi_transfer *transfer);
-extern u32 dw_spi_update_cr0_v1_01a(struct spi_controller *master,
-				    struct spi_device *spi,
-				    struct spi_transfer *transfer);
 
 #ifdef CONFIG_SPI_DW_DMA
 
diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c
index 37a3e0f..8a440c7 100644
--- a/drivers/spi/spi-fsi.c
+++ b/drivers/spi/spi-fsi.c
@@ -12,6 +12,7 @@
 
 #define FSI_ENGID_SPI			0x23
 #define FSI_MBOX_ROOT_CTRL_8		0x2860
+#define  FSI_MBOX_ROOT_CTRL_8_SPI_MUX	 0xf0000000
 
 #define FSI2SPI_DATA0			0x00
 #define FSI2SPI_DATA1			0x04
@@ -24,11 +25,16 @@
 
 #define SPI_FSI_BASE			0x70000
 #define SPI_FSI_INIT_TIMEOUT_MS		1000
-#define SPI_FSI_MAX_TRANSFER_SIZE	2048
+#define SPI_FSI_MAX_XFR_SIZE		2048
+#define SPI_FSI_MAX_XFR_SIZE_RESTRICTED	32
 
 #define SPI_FSI_ERROR			0x0
 #define SPI_FSI_COUNTER_CFG		0x1
 #define  SPI_FSI_COUNTER_CFG_LOOPS(x)	 (((u64)(x) & 0xffULL) << 32)
+#define  SPI_FSI_COUNTER_CFG_N2_RX	 BIT_ULL(8)
+#define  SPI_FSI_COUNTER_CFG_N2_TX	 BIT_ULL(9)
+#define  SPI_FSI_COUNTER_CFG_N2_IMPLICIT BIT_ULL(10)
+#define  SPI_FSI_COUNTER_CFG_N2_RELOAD	 BIT_ULL(11)
 #define SPI_FSI_CFG1			0x2
 #define SPI_FSI_CLOCK_CFG		0x3
 #define  SPI_FSI_CLOCK_CFG_MM_ENABLE	 BIT_ULL(32)
@@ -61,7 +67,7 @@
 #define  SPI_FSI_STATUS_RDR_OVERRUN	 BIT_ULL(62)
 #define  SPI_FSI_STATUS_RDR_FULL	 BIT_ULL(63)
 #define  SPI_FSI_STATUS_ANY_ERROR	 \
-	(SPI_FSI_STATUS_ERROR | SPI_FSI_STATUS_TDR_UNDERRUN | \
+	(SPI_FSI_STATUS_ERROR | \
 	 SPI_FSI_STATUS_TDR_OVERRUN | SPI_FSI_STATUS_RDR_UNDERRUN | \
 	 SPI_FSI_STATUS_RDR_OVERRUN)
 #define SPI_FSI_PORT_CTRL		0x9
@@ -70,6 +76,8 @@
 	struct device *dev;	/* SPI controller device */
 	struct fsi_device *fsi;	/* FSI2SPI CFAM engine device */
 	u32 base;
+	size_t max_xfr_size;
+	bool restricted;
 };
 
 struct fsi_spi_sequence {
@@ -77,6 +85,26 @@
 	u64 data;
 };
 
+static int fsi_spi_check_mux(struct fsi_device *fsi, struct device *dev)
+{
+	int rc;
+	u32 root_ctrl_8;
+	__be32 root_ctrl_8_be;
+
+	rc = fsi_slave_read(fsi->slave, FSI_MBOX_ROOT_CTRL_8, &root_ctrl_8_be,
+			    sizeof(root_ctrl_8_be));
+	if (rc)
+		return rc;
+
+	root_ctrl_8 = be32_to_cpu(root_ctrl_8_be);
+	dev_dbg(dev, "Root control register 8: %08x\n", root_ctrl_8);
+	if ((root_ctrl_8 & FSI_MBOX_ROOT_CTRL_8_SPI_MUX) ==
+	     FSI_MBOX_ROOT_CTRL_8_SPI_MUX)
+		return 0;
+
+	return -ENOLINK;
+}
+
 static int fsi_spi_check_status(struct fsi_spi *ctx)
 {
 	int rc;
@@ -205,8 +233,12 @@
 	if (rc)
 		return rc;
 
-	return fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
-				 SPI_FSI_CLOCK_CFG_RESET2);
+	rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+			       SPI_FSI_CLOCK_CFG_RESET2);
+	if (rc)
+		return rc;
+
+	return fsi_spi_write_reg(ctx, SPI_FSI_STATUS, 0ULL);
 }
 
 static int fsi_spi_sequence_add(struct fsi_spi_sequence *seq, u8 val)
@@ -214,8 +246,8 @@
 	/*
 	 * Add the next byte of instruction to the 8-byte sequence register.
 	 * Then decrement the counter so that the next instruction will go in
-	 * the right place. Return the number of "slots" left in the sequence
-	 * register.
+	 * the right place. Return the index of the slot we just filled in the
+	 * sequence register.
 	 */
 	seq->data |= (u64)val << seq->bit;
 	seq->bit -= 8;
@@ -233,40 +265,71 @@
 				     struct fsi_spi_sequence *seq,
 				     struct spi_transfer *transfer)
 {
+	bool docfg = false;
 	int loops;
 	int idx;
 	int rc;
+	u8 val = 0;
 	u8 len = min(transfer->len, 8U);
 	u8 rem = transfer->len % len;
+	u64 cfg = 0ULL;
 
 	loops = transfer->len / len;
 
 	if (transfer->tx_buf) {
-		idx = fsi_spi_sequence_add(seq,
-					   SPI_FSI_SEQUENCE_SHIFT_OUT(len));
+		val = SPI_FSI_SEQUENCE_SHIFT_OUT(len);
+		idx = fsi_spi_sequence_add(seq, val);
+
 		if (rem)
 			rem = SPI_FSI_SEQUENCE_SHIFT_OUT(rem);
 	} else if (transfer->rx_buf) {
-		idx = fsi_spi_sequence_add(seq,
-					   SPI_FSI_SEQUENCE_SHIFT_IN(len));
+		val = SPI_FSI_SEQUENCE_SHIFT_IN(len);
+		idx = fsi_spi_sequence_add(seq, val);
+
 		if (rem)
 			rem = SPI_FSI_SEQUENCE_SHIFT_IN(rem);
 	} else {
 		return -EINVAL;
 	}
 
+	if (ctx->restricted) {
+		const int eidx = rem ? 5 : 6;
+
+		while (loops > 1 && idx <= eidx) {
+			idx = fsi_spi_sequence_add(seq, val);
+			loops--;
+			docfg = true;
+		}
+
+		if (loops > 1) {
+			dev_warn(ctx->dev, "No sequencer slots; aborting.\n");
+			return -EINVAL;
+		}
+	}
+
 	if (loops > 1) {
 		fsi_spi_sequence_add(seq, SPI_FSI_SEQUENCE_BRANCH(idx));
+		docfg = true;
+	}
 
-		if (rem)
-			fsi_spi_sequence_add(seq, rem);
+	if (docfg) {
+		cfg = SPI_FSI_COUNTER_CFG_LOOPS(loops - 1);
+		if (transfer->rx_buf)
+			cfg |= SPI_FSI_COUNTER_CFG_N2_RX |
+				SPI_FSI_COUNTER_CFG_N2_TX |
+				SPI_FSI_COUNTER_CFG_N2_IMPLICIT |
+				SPI_FSI_COUNTER_CFG_N2_RELOAD;
 
-		rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG,
-				       SPI_FSI_COUNTER_CFG_LOOPS(loops - 1));
+		rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG, cfg);
 		if (rc)
 			return rc;
+	} else {
+		fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG, 0ULL);
 	}
 
+	if (rem)
+		fsi_spi_sequence_add(seq, rem);
+
 	return 0;
 }
 
@@ -275,6 +338,7 @@
 {
 	int rc = 0;
 	u64 status = 0ULL;
+	u64 cfg = 0ULL;
 
 	if (transfer->tx_buf) {
 		int nb;
@@ -312,6 +376,16 @@
 		u64 in = 0ULL;
 		u8 *rx = transfer->rx_buf;
 
+		rc = fsi_spi_read_reg(ctx, SPI_FSI_COUNTER_CFG, &cfg);
+		if (rc)
+			return rc;
+
+		if (cfg & SPI_FSI_COUNTER_CFG_N2_IMPLICIT) {
+			rc = fsi_spi_write_reg(ctx, SPI_FSI_DATA_TX, 0);
+			if (rc)
+				return rc;
+		}
+
 		while (transfer->len > recv) {
 			do {
 				rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS,
@@ -350,7 +424,7 @@
 	u64 status = 0ULL;
 	u64 wanted_clock_cfg = SPI_FSI_CLOCK_CFG_ECC_DISABLE |
 		SPI_FSI_CLOCK_CFG_SCK_NO_DEL |
-		FIELD_PREP(SPI_FSI_CLOCK_CFG_SCK_DIV, 4);
+		FIELD_PREP(SPI_FSI_CLOCK_CFG_SCK_DIV, 19);
 
 	end = jiffies + msecs_to_jiffies(SPI_FSI_INIT_TIMEOUT_MS);
 	do {
@@ -396,18 +470,22 @@
 static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
 					struct spi_message *mesg)
 {
-	int rc = 0;
+	int rc;
 	u8 seq_slave = SPI_FSI_SEQUENCE_SEL_SLAVE(mesg->spi->chip_select + 1);
 	struct spi_transfer *transfer;
 	struct fsi_spi *ctx = spi_controller_get_devdata(ctlr);
 
+	rc = fsi_spi_check_mux(ctx->fsi, ctx->dev);
+	if (rc)
+		return rc;
+
 	list_for_each_entry(transfer, &mesg->transfers, transfer_list) {
 		struct fsi_spi_sequence seq;
 		struct spi_transfer *next = NULL;
 
 		/* Sequencer must do shift out (tx) first. */
 		if (!transfer->tx_buf ||
-		    transfer->len > SPI_FSI_MAX_TRANSFER_SIZE) {
+		    transfer->len > (ctx->max_xfr_size + 8)) {
 			rc = -EINVAL;
 			goto error;
 		}
@@ -431,7 +509,7 @@
 
 			/* Sequencer can only do shift in (rx) after tx. */
 			if (next->rx_buf) {
-				if (next->len > SPI_FSI_MAX_TRANSFER_SIZE) {
+				if (next->len > ctx->max_xfr_size) {
 					rc = -EINVAL;
 					goto error;
 				}
@@ -476,30 +554,21 @@
 
 static size_t fsi_spi_max_transfer_size(struct spi_device *spi)
 {
-	return SPI_FSI_MAX_TRANSFER_SIZE;
+	struct fsi_spi *ctx = spi_controller_get_devdata(spi->controller);
+
+	return ctx->max_xfr_size;
 }
 
 static int fsi_spi_probe(struct device *dev)
 {
 	int rc;
-	u32 root_ctrl_8;
 	struct device_node *np;
 	int num_controllers_registered = 0;
 	struct fsi_device *fsi = to_fsi_dev(dev);
 
-	/*
-	 * Check the SPI mux before attempting to probe. If the mux isn't set
-	 * then the SPI controllers can't access their slave devices.
-	 */
-	rc = fsi_slave_read(fsi->slave, FSI_MBOX_ROOT_CTRL_8, &root_ctrl_8,
-			    sizeof(root_ctrl_8));
+	rc = fsi_spi_check_mux(fsi, dev);
 	if (rc)
-		return rc;
-
-	if (!root_ctrl_8) {
-		dev_dbg(dev, "SPI mux not set, aborting probe.\n");
 		return -ENODEV;
-	}
 
 	for_each_available_child_of_node(dev->of_node, np) {
 		u32 base;
@@ -524,6 +593,14 @@
 		ctx->fsi = fsi;
 		ctx->base = base + SPI_FSI_BASE;
 
+		if (of_device_is_compatible(np, "ibm,fsi2spi-restricted")) {
+			ctx->restricted = true;
+			ctx->max_xfr_size = SPI_FSI_MAX_XFR_SIZE_RESTRICTED;
+		} else {
+			ctx->restricted = false;
+			ctx->max_xfr_size = SPI_FSI_MAX_XFR_SIZE;
+		}
+
 		rc = devm_spi_register_controller(dev, ctlr);
 		if (rc)
 			spi_controller_put(ctlr);
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 108a7d5..3967afa 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -53,7 +53,6 @@
 
 #define SPI_SR				0x2c
 #define SPI_SR_TCFQF			BIT(31)
-#define SPI_SR_EOQF			BIT(28)
 #define SPI_SR_TFUF			BIT(27)
 #define SPI_SR_TFFF			BIT(25)
 #define SPI_SR_CMDTCF			BIT(23)
@@ -62,7 +61,7 @@
 #define SPI_SR_TFIWF			BIT(18)
 #define SPI_SR_RFDF			BIT(17)
 #define SPI_SR_CMDFFF			BIT(16)
-#define SPI_SR_CLEAR			(SPI_SR_TCFQF | SPI_SR_EOQF | \
+#define SPI_SR_CLEAR			(SPI_SR_TCFQF | \
 					SPI_SR_TFUF | SPI_SR_TFFF | \
 					SPI_SR_CMDTCF | SPI_SR_SPEF | \
 					SPI_SR_RFOF | SPI_SR_TFIWF | \
@@ -75,7 +74,6 @@
 
 #define SPI_RSER			0x30
 #define SPI_RSER_TCFQE			BIT(31)
-#define SPI_RSER_EOQFE			BIT(28)
 #define SPI_RSER_CMDTCFE		BIT(23)
 
 #define SPI_PUSHR			0x34
@@ -114,7 +112,6 @@
 };
 
 enum dspi_trans_mode {
-	DSPI_EOQ_MODE = 0,
 	DSPI_XSPI_MODE,
 	DSPI_DMA_MODE,
 };
@@ -189,7 +186,7 @@
 		.fifo_size		= 4,
 	},
 	[MCF5441X] = {
-		.trans_mode		= DSPI_EOQ_MODE,
+		.trans_mode		= DSPI_DMA_MODE,
 		.max_clock_factor	= 8,
 		.fifo_size		= 16,
 	},
@@ -671,11 +668,6 @@
 	}
 }
 
-static void dspi_pushr_write(struct fsl_dspi *dspi)
-{
-	regmap_write(dspi->regmap, SPI_PUSHR, dspi_pop_tx_pushr(dspi));
-}
-
 static void dspi_pushr_cmd_write(struct fsl_dspi *dspi, u16 cmd)
 {
 	/*
@@ -735,21 +727,6 @@
 	}
 }
 
-static void dspi_eoq_fifo_write(struct fsl_dspi *dspi, int num_words)
-{
-	u16 xfer_cmd = dspi->tx_cmd;
-
-	/* Fill TX FIFO with as many transfers as possible */
-	while (num_words--) {
-		dspi->tx_cmd = xfer_cmd;
-		/* Request EOQF for last transfer in FIFO */
-		if (num_words == 0)
-			dspi->tx_cmd |= SPI_PUSHR_CMD_EOQ;
-		/* Write combined TX FIFO and CMD FIFO entry */
-		dspi_pushr_write(dspi);
-	}
-}
-
 static u32 dspi_popr_read(struct fsl_dspi *dspi)
 {
 	u32 rxdata = 0;
@@ -818,7 +795,7 @@
 	dspi->oper_word_size = DIV_ROUND_UP(dspi->oper_bits_per_word, 8);
 
 	/*
-	 * Update CTAR here (code is common for EOQ, XSPI and DMA modes).
+	 * Update CTAR here (code is common for XSPI and DMA modes).
 	 * We will update CTARE in the portion specific to XSPI, when we
 	 * also know the preload value (DTCP).
 	 */
@@ -862,10 +839,7 @@
 
 	spi_take_timestamp_pre(dspi->ctlr, xfer, dspi->progress, !dspi->irq);
 
-	if (dspi->devtype_data->trans_mode == DSPI_EOQ_MODE)
-		dspi_eoq_fifo_write(dspi, num_words);
-	else
-		dspi_xspi_fifo_write(dspi, num_words);
+	dspi_xspi_fifo_write(dspi, num_words);
 	/*
 	 * Everything after this point is in a potential race with the next
 	 * interrupt, so we must never use dspi->words_in_flight again since it
@@ -898,7 +872,7 @@
 		regmap_read(dspi->regmap, SPI_SR, &spi_sr);
 		regmap_write(dspi->regmap, SPI_SR, spi_sr);
 
-		if (spi_sr & (SPI_SR_EOQF | SPI_SR_CMDTCF))
+		if (spi_sr & SPI_SR_CMDTCF)
 			break;
 	} while (--tries);
 
@@ -916,7 +890,7 @@
 	regmap_read(dspi->regmap, SPI_SR, &spi_sr);
 	regmap_write(dspi->regmap, SPI_SR, spi_sr);
 
-	if (!(spi_sr & (SPI_SR_EOQF | SPI_SR_CMDTCF)))
+	if (!(spi_sr & SPI_SR_CMDTCF))
 		return IRQ_NONE;
 
 	if (dspi_rxtx(dspi) == 0)
@@ -1204,9 +1178,6 @@
 	regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR);
 
 	switch (dspi->devtype_data->trans_mode) {
-	case DSPI_EOQ_MODE:
-		regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_EOQFE);
-		break;
 	case DSPI_XSPI_MODE:
 		regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_CMDTCFE);
 		break;
@@ -1245,22 +1216,6 @@
 	return 0;
 }
 
-/*
- * EOQ mode will inevitably deassert its PCS signal on last word in a queue
- * (hardware limitation), so we need to inform the spi_device that larger
- * buffers than the FIFO size are going to have the chip select randomly
- * toggling, so it has a chance to adapt its message sizes.
- */
-static size_t dspi_max_message_size(struct spi_device *spi)
-{
-	struct fsl_dspi *dspi = spi_controller_get_devdata(spi->controller);
-
-	if (dspi->devtype_data->trans_mode == DSPI_EOQ_MODE)
-		return dspi->devtype_data->fifo_size;
-
-	return SIZE_MAX;
-}
-
 static int dspi_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -1289,7 +1244,6 @@
 
 	ctlr->setup = dspi_setup;
 	ctlr->transfer_one_message = dspi_transfer_one_message;
-	ctlr->max_message_size = dspi_max_message_size;
 	ctlr->dev.of_node = pdev->dev.of_node;
 
 	ctlr->cleanup = dspi_cleanup;
diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index 6d148ab..cf2b947 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -731,7 +731,7 @@
 	if (ret < 0)
 		goto err_pm;
 
-	dev_info(dev, "at 0x%p (irq = %u)\n", espi->reg_base, irq);
+	dev_info(dev, "irq = %u\n", irq);
 
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index 85a5c95..986b979 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -944,8 +944,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int fsl_lpspi_suspend(struct device *dev)
+static int __maybe_unused fsl_lpspi_suspend(struct device *dev)
 {
 	int ret;
 
@@ -954,7 +953,7 @@
 	return ret;
 }
 
-static int fsl_lpspi_resume(struct device *dev)
+static int __maybe_unused fsl_lpspi_resume(struct device *dev)
 {
 	int ret;
 
@@ -968,7 +967,6 @@
 
 	return 0;
 }
-#endif /* CONFIG_PM_SLEEP */
 
 static const struct dev_pm_ops fsl_lpspi_pm_ops = {
 	SET_RUNTIME_PM_OPS(fsl_lpspi_runtime_suspend,
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 80cea5c..25810a7 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -290,6 +290,7 @@
 {
 	struct geni_se *se = &mas->se;
 	unsigned int proto, major, minor, ver;
+	u32 spi_tx_cfg;
 
 	pm_runtime_get_sync(mas->dev);
 
@@ -308,7 +309,7 @@
 	 * Hardware programming guide suggests to configure
 	 * RX FIFO RFR level to fifo_depth-2.
 	 */
-	geni_se_init(se, mas->tx_fifo_depth / 2, mas->tx_fifo_depth - 2);
+	geni_se_init(se, mas->tx_fifo_depth - 3, mas->tx_fifo_depth - 2);
 	/* Transmit an entire FIFO worth of data per IRQ */
 	mas->tx_wm = 1;
 	ver = geni_se_get_qup_hw_version(se);
@@ -322,99 +323,13 @@
 
 	geni_se_select_mode(se, GENI_SE_FIFO);
 
-	pm_runtime_put(mas->dev);
-	return 0;
-}
-
-static void setup_fifo_xfer(struct spi_transfer *xfer,
-				struct spi_geni_master *mas,
-				u16 mode, struct spi_master *spi)
-{
-	u32 m_cmd = 0;
-	u32 spi_tx_cfg, len;
-	struct geni_se *se = &mas->se;
-	int ret;
-
-	/*
-	 * Ensure that our interrupt handler isn't still running from some
-	 * prior command before we start messing with the hardware behind
-	 * its back.  We don't need to _keep_ the lock here since we're only
-	 * worried about racing with out interrupt handler.  The SPI core
-	 * already handles making sure that we're not trying to do two
-	 * transfers at once or setting a chip select and doing a transfer
-	 * concurrently.
-	 *
-	 * NOTE: we actually _can't_ hold the lock here because possibly we
-	 * might call clk_set_rate() which needs to be able to sleep.
-	 */
-	spin_lock_irq(&mas->lock);
-	spin_unlock_irq(&mas->lock);
-
+	/* We always control CS manually */
 	spi_tx_cfg = readl(se->base + SE_SPI_TRANS_CFG);
-	if (xfer->bits_per_word != mas->cur_bits_per_word) {
-		spi_setup_word_len(mas, mode, xfer->bits_per_word);
-		mas->cur_bits_per_word = xfer->bits_per_word;
-	}
-
-	/* Speed and bits per word can be overridden per transfer */
-	ret = geni_spi_set_clock_and_bw(mas, xfer->speed_hz);
-	if (ret)
-		return;
-
-	mas->tx_rem_bytes = 0;
-	mas->rx_rem_bytes = 0;
-
 	spi_tx_cfg &= ~CS_TOGGLE;
-
-	if (!(mas->cur_bits_per_word % MIN_WORD_LEN))
-		len = xfer->len * BITS_PER_BYTE / mas->cur_bits_per_word;
-	else
-		len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
-	len &= TRANS_LEN_MSK;
-
-	mas->cur_xfer = xfer;
-	if (xfer->tx_buf) {
-		m_cmd |= SPI_TX_ONLY;
-		mas->tx_rem_bytes = xfer->len;
-		writel(len, se->base + SE_SPI_TX_TRANS_LEN);
-	}
-
-	if (xfer->rx_buf) {
-		m_cmd |= SPI_RX_ONLY;
-		writel(len, se->base + SE_SPI_RX_TRANS_LEN);
-		mas->rx_rem_bytes = xfer->len;
-	}
 	writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
 
-	/*
-	 * Lock around right before we start the transfer since our
-	 * interrupt could come in at any time now.
-	 */
-	spin_lock_irq(&mas->lock);
-	geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION);
-
-	/*
-	 * TX_WATERMARK_REG should be set after SPI configuration and
-	 * setting up GENI SE engine, as driver starts data transfer
-	 * for the watermark interrupt.
-	 */
-	if (m_cmd & SPI_TX_ONLY)
-		writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
-	spin_unlock_irq(&mas->lock);
-}
-
-static int spi_geni_transfer_one(struct spi_master *spi,
-				struct spi_device *slv,
-				struct spi_transfer *xfer)
-{
-	struct spi_geni_master *mas = spi_master_get_devdata(spi);
-
-	/* Terminate and return success for 0 byte length transfer */
-	if (!xfer->len)
-		return 0;
-
-	setup_fifo_xfer(xfer, mas, slv->mode, spi);
-	return 1;
+	pm_runtime_put(mas->dev);
+	return 0;
 }
 
 static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
@@ -431,7 +346,7 @@
 	return mas->fifo_width_bits / BITS_PER_BYTE;
 }
 
-static void geni_spi_handle_tx(struct spi_geni_master *mas)
+static bool geni_spi_handle_tx(struct spi_geni_master *mas)
 {
 	struct geni_se *se = &mas->se;
 	unsigned int max_bytes;
@@ -456,8 +371,11 @@
 		iowrite32_rep(se->base + SE_GENI_TX_FIFOn, &fifo_word, 1);
 	}
 	mas->tx_rem_bytes -= max_bytes;
-	if (!mas->tx_rem_bytes)
+	if (!mas->tx_rem_bytes) {
 		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+		return false;
+	}
+	return true;
 }
 
 static void geni_spi_handle_rx(struct spi_geni_master *mas)
@@ -496,6 +414,95 @@
 	mas->rx_rem_bytes -= rx_bytes;
 }
 
+static void setup_fifo_xfer(struct spi_transfer *xfer,
+				struct spi_geni_master *mas,
+				u16 mode, struct spi_master *spi)
+{
+	u32 m_cmd = 0;
+	u32 len;
+	struct geni_se *se = &mas->se;
+	int ret;
+
+	/*
+	 * Ensure that our interrupt handler isn't still running from some
+	 * prior command before we start messing with the hardware behind
+	 * its back.  We don't need to _keep_ the lock here since we're only
+	 * worried about racing with out interrupt handler.  The SPI core
+	 * already handles making sure that we're not trying to do two
+	 * transfers at once or setting a chip select and doing a transfer
+	 * concurrently.
+	 *
+	 * NOTE: we actually _can't_ hold the lock here because possibly we
+	 * might call clk_set_rate() which needs to be able to sleep.
+	 */
+	spin_lock_irq(&mas->lock);
+	spin_unlock_irq(&mas->lock);
+
+	if (xfer->bits_per_word != mas->cur_bits_per_word) {
+		spi_setup_word_len(mas, mode, xfer->bits_per_word);
+		mas->cur_bits_per_word = xfer->bits_per_word;
+	}
+
+	/* Speed and bits per word can be overridden per transfer */
+	ret = geni_spi_set_clock_and_bw(mas, xfer->speed_hz);
+	if (ret)
+		return;
+
+	mas->tx_rem_bytes = 0;
+	mas->rx_rem_bytes = 0;
+
+	if (!(mas->cur_bits_per_word % MIN_WORD_LEN))
+		len = xfer->len * BITS_PER_BYTE / mas->cur_bits_per_word;
+	else
+		len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
+	len &= TRANS_LEN_MSK;
+
+	mas->cur_xfer = xfer;
+	if (xfer->tx_buf) {
+		m_cmd |= SPI_TX_ONLY;
+		mas->tx_rem_bytes = xfer->len;
+		writel(len, se->base + SE_SPI_TX_TRANS_LEN);
+	}
+
+	if (xfer->rx_buf) {
+		m_cmd |= SPI_RX_ONLY;
+		writel(len, se->base + SE_SPI_RX_TRANS_LEN);
+		mas->rx_rem_bytes = xfer->len;
+	}
+
+	/*
+	 * Lock around right before we start the transfer since our
+	 * interrupt could come in at any time now.
+	 */
+	spin_lock_irq(&mas->lock);
+	geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION);
+
+	/*
+	 * TX_WATERMARK_REG should be set after SPI configuration and
+	 * setting up GENI SE engine, as driver starts data transfer
+	 * for the watermark interrupt.
+	 */
+	if (m_cmd & SPI_TX_ONLY) {
+		if (geni_spi_handle_tx(mas))
+			writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
+	}
+	spin_unlock_irq(&mas->lock);
+}
+
+static int spi_geni_transfer_one(struct spi_master *spi,
+				struct spi_device *slv,
+				struct spi_transfer *xfer)
+{
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+
+	/* Terminate and return success for 0 byte length transfer */
+	if (!xfer->len)
+		return 0;
+
+	setup_fifo_xfer(xfer, mas, slv->mode, spi);
+	return 1;
+}
+
 static irqreturn_t geni_spi_isr(int irq, void *data)
 {
 	struct spi_master *spi = data;
@@ -613,11 +620,9 @@
 		return PTR_ERR(mas->se.opp_table);
 	/* OPP table is optional */
 	ret = dev_pm_opp_of_add_table(&pdev->dev);
-	if (!ret) {
-		mas->se.has_opp_table = true;
-	} else if (ret != -ENODEV) {
+	if (ret && ret != -ENODEV) {
 		dev_err(&pdev->dev, "invalid OPP table in device tree\n");
-		return ret;
+		goto put_clkname;
 	}
 
 	spi->bus_num = -1;
@@ -669,8 +674,8 @@
 spi_geni_probe_runtime_disable:
 	pm_runtime_disable(dev);
 	spi_master_put(spi);
-	if (mas->se.has_opp_table)
-		dev_pm_opp_of_remove_table(&pdev->dev);
+	dev_pm_opp_of_remove_table(&pdev->dev);
+put_clkname:
 	dev_pm_opp_put_clkname(mas->se.opp_table);
 	return ret;
 }
@@ -685,8 +690,7 @@
 
 	free_irq(mas->irq, spi);
 	pm_runtime_disable(&pdev->dev);
-	if (mas->se.has_opp_table)
-		dev_pm_opp_of_remove_table(&pdev->dev);
+	dev_pm_opp_of_remove_table(&pdev->dev);
 	dev_pm_opp_put_clkname(mas->se.opp_table);
 	return 0;
 }
diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c
index 64a18d0..4650b48 100644
--- a/drivers/spi/spi-hisi-sfc-v3xx.c
+++ b/drivers/spi/spi-hisi-sfc-v3xx.c
@@ -7,7 +7,9 @@
 
 #include <linux/acpi.h>
 #include <linux/bitops.h>
+#include <linux/completion.h>
 #include <linux/dmi.h>
+#include <linux/interrupt.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -17,18 +19,11 @@
 
 #define HISI_SFC_V3XX_VERSION (0x1f8)
 
-#define HISI_SFC_V3XX_INT_STAT (0x120)
-#define HISI_SFC_V3XX_INT_STAT_PP_ERR BIT(2)
-#define HISI_SFC_V3XX_INT_STAT_ADDR_IACCES BIT(5)
+#define HISI_SFC_V3XX_RAW_INT_STAT (0x120)
+#define HISI_SFC_V3XX_INT_STAT (0x124)
+#define HISI_SFC_V3XX_INT_MASK (0x128)
 #define HISI_SFC_V3XX_INT_CLR (0x12c)
-#define HISI_SFC_V3XX_INT_CLR_CLEAR (0xff)
 #define HISI_SFC_V3XX_CMD_CFG (0x300)
-#define HISI_SFC_V3XX_CMD_CFG_DUAL_IN_DUAL_OUT (1 << 17)
-#define HISI_SFC_V3XX_CMD_CFG_DUAL_IO (2 << 17)
-#define HISI_SFC_V3XX_CMD_CFG_FULL_DIO (3 << 17)
-#define HISI_SFC_V3XX_CMD_CFG_QUAD_IN_QUAD_OUT (5 << 17)
-#define HISI_SFC_V3XX_CMD_CFG_QUAD_IO (6 << 17)
-#define HISI_SFC_V3XX_CMD_CFG_FULL_QIO (7 << 17)
 #define HISI_SFC_V3XX_CMD_CFG_DATA_CNT_OFF 9
 #define HISI_SFC_V3XX_CMD_CFG_RW_MSK BIT(8)
 #define HISI_SFC_V3XX_CMD_CFG_DATA_EN_MSK BIT(7)
@@ -40,12 +35,99 @@
 #define HISI_SFC_V3XX_CMD_ADDR (0x30c)
 #define HISI_SFC_V3XX_CMD_DATABUF0 (0x400)
 
+/* Common definition of interrupt bit masks */
+#define HISI_SFC_V3XX_INT_MASK_ALL (0x1ff)	/* all the masks */
+#define HISI_SFC_V3XX_INT_MASK_CPLT BIT(0)	/* command execution complete */
+#define HISI_SFC_V3XX_INT_MASK_PP_ERR BIT(2)	/* page progrom error */
+#define HISI_SFC_V3XX_INT_MASK_IACCES BIT(5)	/* error visiting inaccessible/
+						 * protected address
+						 */
+
+/* IO Mode definition in HISI_SFC_V3XX_CMD_CFG */
+#define HISI_SFC_V3XX_STD (0 << 17)
+#define HISI_SFC_V3XX_DIDO (1 << 17)
+#define HISI_SFC_V3XX_DIO (2 << 17)
+#define HISI_SFC_V3XX_FULL_DIO (3 << 17)
+#define HISI_SFC_V3XX_QIQO (5 << 17)
+#define HISI_SFC_V3XX_QIO (6 << 17)
+#define HISI_SFC_V3XX_FULL_QIO (7 << 17)
+
+/*
+ * The IO modes lookup table. hisi_sfc_v3xx_io_modes[(z - 1) / 2][y / 2][x / 2]
+ * stands for x-y-z mode, as described in SFDP terminology. -EIO indicates
+ * an invalid mode.
+ */
+static const int hisi_sfc_v3xx_io_modes[2][3][3] = {
+	{
+		{ HISI_SFC_V3XX_DIDO, HISI_SFC_V3XX_DIDO, HISI_SFC_V3XX_DIDO },
+		{ HISI_SFC_V3XX_DIO, HISI_SFC_V3XX_FULL_DIO, -EIO },
+		{ -EIO, -EIO, -EIO },
+	},
+	{
+		{ HISI_SFC_V3XX_QIQO, HISI_SFC_V3XX_QIQO, HISI_SFC_V3XX_QIQO },
+		{ -EIO, -EIO, -EIO },
+		{ HISI_SFC_V3XX_QIO, -EIO, HISI_SFC_V3XX_FULL_QIO },
+	},
+};
+
 struct hisi_sfc_v3xx_host {
 	struct device *dev;
 	void __iomem *regbase;
 	int max_cmd_dword;
+	struct completion *completion;
+	int irq;
 };
 
+static void hisi_sfc_v3xx_disable_int(struct hisi_sfc_v3xx_host *host)
+{
+	writel(0, host->regbase + HISI_SFC_V3XX_INT_MASK);
+}
+
+static void hisi_sfc_v3xx_enable_int(struct hisi_sfc_v3xx_host *host)
+{
+	writel(HISI_SFC_V3XX_INT_MASK_ALL, host->regbase + HISI_SFC_V3XX_INT_MASK);
+}
+
+static void hisi_sfc_v3xx_clear_int(struct hisi_sfc_v3xx_host *host)
+{
+	writel(HISI_SFC_V3XX_INT_MASK_ALL, host->regbase + HISI_SFC_V3XX_INT_CLR);
+}
+
+/*
+ * The interrupt status register indicates whether an error occurs
+ * after per operation. Check it, and clear the interrupts for
+ * next time judgement.
+ */
+static int hisi_sfc_v3xx_handle_completion(struct hisi_sfc_v3xx_host *host)
+{
+	u32 reg;
+
+	reg = readl(host->regbase + HISI_SFC_V3XX_RAW_INT_STAT);
+	hisi_sfc_v3xx_clear_int(host);
+
+	if (reg & HISI_SFC_V3XX_INT_MASK_IACCES) {
+		dev_err(host->dev, "fail to access protected address\n");
+		return -EIO;
+	}
+
+	if (reg & HISI_SFC_V3XX_INT_MASK_PP_ERR) {
+		dev_err(host->dev, "page program operation failed\n");
+		return -EIO;
+	}
+
+	/*
+	 * The other bits of the interrupt registers is not currently
+	 * used and probably not be triggered in this driver. When it
+	 * happens, we regard it as an unsupported error here.
+	 */
+	if (!(reg & HISI_SFC_V3XX_INT_MASK_CPLT)) {
+		dev_err(host->dev, "unsupported error occurred, status=0x%x\n", reg);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 #define HISI_SFC_V3XX_WAIT_TIMEOUT_US		1000000
 #define HISI_SFC_V3XX_WAIT_POLL_INTERVAL_US	10
 
@@ -80,6 +162,20 @@
 }
 
 /*
+ * The controller only supports Standard SPI mode, Duall mode and
+ * Quad mode. Double sanitize the ops here to avoid OOB access.
+ */
+static bool hisi_sfc_v3xx_supports_op(struct spi_mem *mem,
+				      const struct spi_mem_op *op)
+{
+	if (op->data.buswidth > 4 || op->dummy.buswidth > 4 ||
+	    op->addr.buswidth > 4 || op->cmd.buswidth > 4)
+		return false;
+
+	return spi_mem_default_supports_op(mem, op);
+}
+
+/*
  * memcpy_{to,from}io doesn't gurantee 32b accesses - which we require for the
  * DATABUF registers -so use __io{read,write}32_copy when possible. For
  * trailing bytes, copy them byte-by-byte from the DATABUF register, as we
@@ -163,61 +259,36 @@
 	}
 }
 
-static int hisi_sfc_v3xx_generic_exec_op(struct hisi_sfc_v3xx_host *host,
-					 const struct spi_mem_op *op,
-					 u8 chip_select)
+static int hisi_sfc_v3xx_start_bus(struct hisi_sfc_v3xx_host *host,
+				   const struct spi_mem_op *op,
+				   u8 chip_select)
 {
-	int ret, len = op->data.nbytes;
-	u32 int_stat, config = 0;
+	int len = op->data.nbytes, buswidth_mode;
+	u32 config = 0;
 
 	if (op->addr.nbytes)
 		config |= HISI_SFC_V3XX_CMD_CFG_ADDR_EN_MSK;
 
-	switch (op->data.buswidth) {
-	case 0 ... 1:
-		break;
-	case 2:
-		if (op->addr.buswidth <= 1) {
-			config |= HISI_SFC_V3XX_CMD_CFG_DUAL_IN_DUAL_OUT;
-		} else if (op->addr.buswidth == 2) {
-			if (op->cmd.buswidth <= 1) {
-				config |= HISI_SFC_V3XX_CMD_CFG_DUAL_IO;
-			} else if (op->cmd.buswidth == 2) {
-				config |= HISI_SFC_V3XX_CMD_CFG_FULL_DIO;
-			} else {
-				return -EIO;
-			}
-		} else {
-			return -EIO;
-		}
-		break;
-	case 4:
-		if (op->addr.buswidth <= 1) {
-			config |= HISI_SFC_V3XX_CMD_CFG_QUAD_IN_QUAD_OUT;
-		} else if (op->addr.buswidth == 4) {
-			if (op->cmd.buswidth <= 1) {
-				config |= HISI_SFC_V3XX_CMD_CFG_QUAD_IO;
-			} else if (op->cmd.buswidth == 4) {
-				config |= HISI_SFC_V3XX_CMD_CFG_FULL_QIO;
-			} else {
-				return -EIO;
-			}
-		} else {
-			return -EIO;
-		}
-		break;
-	default:
-		return -EOPNOTSUPP;
+	if (op->data.buswidth == 0 || op->data.buswidth == 1) {
+		buswidth_mode = HISI_SFC_V3XX_STD;
+	} else {
+		int data_idx, addr_idx, cmd_idx;
+
+		data_idx = (op->data.buswidth - 1) / 2;
+		addr_idx = op->addr.buswidth / 2;
+		cmd_idx = op->cmd.buswidth / 2;
+		buswidth_mode = hisi_sfc_v3xx_io_modes[data_idx][addr_idx][cmd_idx];
 	}
+	if (buswidth_mode < 0)
+		return buswidth_mode;
+	config |= buswidth_mode;
 
 	if (op->data.dir != SPI_MEM_NO_DATA) {
 		config |= (len - 1) << HISI_SFC_V3XX_CMD_CFG_DATA_CNT_OFF;
 		config |= HISI_SFC_V3XX_CMD_CFG_DATA_EN_MSK;
 	}
 
-	if (op->data.dir == SPI_MEM_DATA_OUT)
-		hisi_sfc_v3xx_write_databuf(host, op->data.buf.out, len);
-	else if (op->data.dir == SPI_MEM_DATA_IN)
+	if (op->data.dir == SPI_MEM_DATA_IN)
 		config |= HISI_SFC_V3XX_CMD_CFG_RW_MSK;
 
 	config |= op->dummy.nbytes << HISI_SFC_V3XX_CMD_CFG_DUMMY_CNT_OFF |
@@ -229,31 +300,46 @@
 
 	writel(config, host->regbase + HISI_SFC_V3XX_CMD_CFG);
 
-	ret = hisi_sfc_v3xx_wait_cmd_idle(host);
+	return 0;
+}
+
+static int hisi_sfc_v3xx_generic_exec_op(struct hisi_sfc_v3xx_host *host,
+					 const struct spi_mem_op *op,
+					 u8 chip_select)
+{
+	DECLARE_COMPLETION_ONSTACK(done);
+	int ret;
+
+	if (host->irq) {
+		host->completion = &done;
+		hisi_sfc_v3xx_enable_int(host);
+	}
+
+	if (op->data.dir == SPI_MEM_DATA_OUT)
+		hisi_sfc_v3xx_write_databuf(host, op->data.buf.out, op->data.nbytes);
+
+	ret = hisi_sfc_v3xx_start_bus(host, op, chip_select);
 	if (ret)
 		return ret;
 
-	/*
-	 * The interrupt status register indicates whether an error occurs
-	 * after per operation. Check it, and clear the interrupts for
-	 * next time judgement.
-	 */
-	int_stat = readl(host->regbase + HISI_SFC_V3XX_INT_STAT);
-	writel(HISI_SFC_V3XX_INT_CLR_CLEAR,
-	       host->regbase + HISI_SFC_V3XX_INT_CLR);
+	if (host->irq) {
+		ret = wait_for_completion_timeout(host->completion,
+						  usecs_to_jiffies(HISI_SFC_V3XX_WAIT_TIMEOUT_US));
+		if (!ret)
+			ret = -ETIMEDOUT;
+		else
+			ret = 0;
 
-	if (int_stat & HISI_SFC_V3XX_INT_STAT_ADDR_IACCES) {
-		dev_err(host->dev, "fail to access protected address\n");
-		return -EIO;
+		hisi_sfc_v3xx_disable_int(host);
+		host->completion = NULL;
+	} else {
+		ret = hisi_sfc_v3xx_wait_cmd_idle(host);
 	}
-
-	if (int_stat & HISI_SFC_V3XX_INT_STAT_PP_ERR) {
-		dev_err(host->dev, "page program operation failed\n");
+	if (hisi_sfc_v3xx_handle_completion(host) || ret)
 		return -EIO;
-	}
 
 	if (op->data.dir == SPI_MEM_DATA_IN)
-		hisi_sfc_v3xx_read_databuf(host, op->data.buf.in, len);
+		hisi_sfc_v3xx_read_databuf(host, op->data.buf.in, op->data.nbytes);
 
 	return 0;
 }
@@ -272,9 +358,21 @@
 
 static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = {
 	.adjust_op_size = hisi_sfc_v3xx_adjust_op_size,
+	.supports_op = hisi_sfc_v3xx_supports_op,
 	.exec_op = hisi_sfc_v3xx_exec_op,
 };
 
+static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data)
+{
+	struct hisi_sfc_v3xx_host *host = data;
+
+	hisi_sfc_v3xx_disable_int(host);
+
+	complete(host->completion);
+
+	return IRQ_HANDLED;
+}
+
 static int hisi_sfc_v3xx_buswidth_override_bits;
 
 /*
@@ -341,6 +439,26 @@
 		goto err_put_master;
 	}
 
+	host->irq = platform_get_irq_optional(pdev, 0);
+	if (host->irq == -EPROBE_DEFER) {
+		ret = -EPROBE_DEFER;
+		goto err_put_master;
+	}
+
+	hisi_sfc_v3xx_disable_int(host);
+
+	if (host->irq > 0) {
+		ret = devm_request_irq(dev, host->irq, hisi_sfc_v3xx_isr, 0,
+				       "hisi-sfc-v3xx", host);
+
+		if (ret) {
+			dev_err(dev, "failed to request irq%d, ret = %d\n", host->irq, ret);
+			host->irq = 0;
+		}
+	} else {
+		host->irq = 0;
+	}
+
 	ctlr->bus_num = -1;
 	ctlr->num_chipselect = 1;
 	ctlr->mem_ops = &hisi_sfc_v3xx_mem_ops;
@@ -360,7 +478,8 @@
 	if (ret)
 		goto err_put_master;
 
-	dev_info(&pdev->dev, "hw version 0x%x\n", version);
+	dev_info(&pdev->dev, "hw version 0x%x, %s mode.\n",
+		 version, host->irq ? "irq" : "polling");
 
 	return 0;
 
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 38a5f13..060b1f5 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -1503,6 +1503,8 @@
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
 
+	transfer->effective_speed_hz = spi_imx->spi_bus_clk;
+
 	/* flush rxfifo before transfer */
 	while (spi_imx->devtype_data->rx_available(spi_imx))
 		readl(spi_imx->base + MXC_CSPIRXDATA);
@@ -1695,7 +1697,7 @@
 			goto out_runtime_pm_put;
 
 		if (ret < 0)
-			dev_err(&pdev->dev, "dma setup error %d, use pio\n",
+			dev_dbg(&pdev->dev, "dma setup error %d, use pio\n",
 				ret);
 	}
 
@@ -1707,16 +1709,17 @@
 	ret = spi_bitbang_start(&spi_imx->bitbang);
 	if (ret) {
 		dev_err(&pdev->dev, "bitbang start failed with %d\n", ret);
-		goto out_runtime_pm_put;
+		goto out_bitbang_start;
 	}
 
-	dev_info(&pdev->dev, "probed\n");
-
 	pm_runtime_mark_last_busy(spi_imx->dev);
 	pm_runtime_put_autosuspend(spi_imx->dev);
 
 	return ret;
 
+out_bitbang_start:
+	if (spi_imx->devtype_data->has_dmamode)
+		spi_imx_sdma_exit(spi_imx);
 out_runtime_pm_put:
 	pm_runtime_dont_use_autosuspend(spi_imx->dev);
 	pm_runtime_put_sync(spi_imx->dev);
diff --git a/drivers/spi/spi-lantiq-ssc.c b/drivers/spi/spi-lantiq-ssc.c
index 3cbecb2..bcb5260 100644
--- a/drivers/spi/spi-lantiq-ssc.c
+++ b/drivers/spi/spi-lantiq-ssc.c
@@ -625,9 +625,8 @@
 	struct lantiq_ssc_spi *spi = data;
 	const struct lantiq_ssc_hwcfg *hwcfg = spi->hwcfg;
 	u32 val = lantiq_ssc_readl(spi, hwcfg->irncr);
-	unsigned long flags;
 
-	spin_lock_irqsave(&spi->lock, flags);
+	spin_lock(&spi->lock);
 	if (hwcfg->irq_ack)
 		lantiq_ssc_writel(spi, val, hwcfg->irncr);
 
@@ -652,12 +651,12 @@
 		}
 	}
 
-	spin_unlock_irqrestore(&spi->lock, flags);
+	spin_unlock(&spi->lock);
 	return IRQ_HANDLED;
 
 completed:
 	queue_work(spi->wq, &spi->work);
-	spin_unlock_irqrestore(&spi->lock, flags);
+	spin_unlock(&spi->lock);
 
 	return IRQ_HANDLED;
 }
@@ -668,12 +667,11 @@
 	const struct lantiq_ssc_hwcfg *hwcfg = spi->hwcfg;
 	u32 stat = lantiq_ssc_readl(spi, LTQ_SPI_STAT);
 	u32 val = lantiq_ssc_readl(spi, hwcfg->irncr);
-	unsigned long flags;
 
 	if (!(stat & LTQ_SPI_STAT_ERRORS))
 		return IRQ_NONE;
 
-	spin_lock_irqsave(&spi->lock, flags);
+	spin_lock(&spi->lock);
 	if (hwcfg->irq_ack)
 		lantiq_ssc_writel(spi, val, hwcfg->irncr);
 
@@ -697,7 +695,7 @@
 	if (spi->master->cur_msg)
 		spi->master->cur_msg->status = -EIO;
 	queue_work(spi->wq, &spi->work);
-	spin_unlock_irqrestore(&spi->lock, flags);
+	spin_unlock(&spi->lock);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index b08d8e9..b97f26a 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
 #include <linux/string.h>
@@ -27,6 +28,7 @@
 #define MTK_NOR_CMD_MASK		GENMASK(5, 0)
 
 #define MTK_NOR_REG_PRG_CNT		0x04
+#define MTK_NOR_PRG_CNT_MAX		56
 #define MTK_NOR_REG_RDATA		0x0c
 
 #define MTK_NOR_REG_RADR0		0x10
@@ -78,6 +80,8 @@
 #define MTK_NOR_REG_DMA_FADR		0x71c
 #define MTK_NOR_REG_DMA_DADR		0x720
 #define MTK_NOR_REG_DMA_END_DADR	0x724
+#define MTK_NOR_REG_DMA_DADR_HB		0x738
+#define MTK_NOR_REG_DMA_END_DADR_HB	0x73c
 
 #define MTK_NOR_PRG_MAX_SIZE		6
 // Reading DMA src/dst addresses have to be 16-byte aligned
@@ -89,18 +93,20 @@
 // Buffered page program can do one 128-byte transfer
 #define MTK_NOR_PP_SIZE			128
 
-#define CLK_TO_US(sp, clkcnt)		((clkcnt) * 1000000 / sp->spi_freq)
+#define CLK_TO_US(sp, clkcnt)		DIV_ROUND_UP(clkcnt, sp->spi_freq / 1000000)
 
 struct mtk_nor {
 	struct spi_controller *ctlr;
 	struct device *dev;
 	void __iomem *base;
 	u8 *buffer;
+	dma_addr_t buffer_dma;
 	struct clk *spi_clk;
 	struct clk *ctlr_clk;
 	unsigned int spi_freq;
 	bool wbuf_en;
 	bool has_irq;
+	bool high_dma;
 	struct completion op_done;
 };
 
@@ -144,6 +150,11 @@
 	}
 }
 
+static bool need_bounce(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	return ((uintptr_t)op->data.buf.in & MTK_NOR_DMA_ALIGN_MASK);
+}
+
 static bool mtk_nor_match_read(const struct spi_mem_op *op)
 {
 	int dummy = 0;
@@ -167,9 +178,77 @@
 	return false;
 }
 
+static bool mtk_nor_match_prg(const struct spi_mem_op *op)
+{
+	int tx_len, rx_len, prg_len, prg_left;
+
+	// prg mode is spi-only.
+	if ((op->cmd.buswidth > 1) || (op->addr.buswidth > 1) ||
+	    (op->dummy.buswidth > 1) || (op->data.buswidth > 1))
+		return false;
+
+	tx_len = op->cmd.nbytes + op->addr.nbytes;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		// count dummy bytes only if we need to write data after it
+		tx_len += op->dummy.nbytes;
+
+		// leave at least one byte for data
+		if (tx_len > MTK_NOR_REG_PRGDATA_MAX)
+			return false;
+
+		// if there's no addr, meaning adjust_op_size is impossible,
+		// check data length as well.
+		if ((!op->addr.nbytes) &&
+		    (tx_len + op->data.nbytes > MTK_NOR_REG_PRGDATA_MAX + 1))
+			return false;
+	} else if (op->data.dir == SPI_MEM_DATA_IN) {
+		if (tx_len > MTK_NOR_REG_PRGDATA_MAX + 1)
+			return false;
+
+		rx_len = op->data.nbytes;
+		prg_left = MTK_NOR_PRG_CNT_MAX / 8 - tx_len - op->dummy.nbytes;
+		if (prg_left > MTK_NOR_REG_SHIFT_MAX + 1)
+			prg_left = MTK_NOR_REG_SHIFT_MAX + 1;
+		if (rx_len > prg_left) {
+			if (!op->addr.nbytes)
+				return false;
+			rx_len = prg_left;
+		}
+
+		prg_len = tx_len + op->dummy.nbytes + rx_len;
+		if (prg_len > MTK_NOR_PRG_CNT_MAX / 8)
+			return false;
+	} else {
+		prg_len = tx_len + op->dummy.nbytes;
+		if (prg_len > MTK_NOR_PRG_CNT_MAX / 8)
+			return false;
+	}
+	return true;
+}
+
+static void mtk_nor_adj_prg_size(struct spi_mem_op *op)
+{
+	int tx_len, tx_left, prg_left;
+
+	tx_len = op->cmd.nbytes + op->addr.nbytes;
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		tx_len += op->dummy.nbytes;
+		tx_left = MTK_NOR_REG_PRGDATA_MAX + 1 - tx_len;
+		if (op->data.nbytes > tx_left)
+			op->data.nbytes = tx_left;
+	} else if (op->data.dir == SPI_MEM_DATA_IN) {
+		prg_left = MTK_NOR_PRG_CNT_MAX / 8 - tx_len - op->dummy.nbytes;
+		if (prg_left > MTK_NOR_REG_SHIFT_MAX + 1)
+			prg_left = MTK_NOR_REG_SHIFT_MAX + 1;
+		if (op->data.nbytes > prg_left)
+			op->data.nbytes = prg_left;
+	}
+}
+
 static int mtk_nor_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
 {
-	size_t len;
+	struct mtk_nor *sp = spi_controller_get_devdata(mem->spi->master);
 
 	if (!op->data.nbytes)
 		return 0;
@@ -177,11 +256,14 @@
 	if ((op->addr.nbytes == 3) || (op->addr.nbytes == 4)) {
 		if ((op->data.dir == SPI_MEM_DATA_IN) &&
 		    mtk_nor_match_read(op)) {
+			// limit size to prevent timeout calculation overflow
+			if (op->data.nbytes > 0x400000)
+				op->data.nbytes = 0x400000;
+
 			if ((op->addr.val & MTK_NOR_DMA_ALIGN_MASK) ||
 			    (op->data.nbytes < MTK_NOR_DMA_ALIGN))
 				op->data.nbytes = 1;
-			else if (!((ulong)(op->data.buf.in) &
-				   MTK_NOR_DMA_ALIGN_MASK))
+			else if (!need_bounce(sp, op))
 				op->data.nbytes &= ~MTK_NOR_DMA_ALIGN_MASK;
 			else if (op->data.nbytes > MTK_NOR_BOUNCE_BUF_SIZE)
 				op->data.nbytes = MTK_NOR_BOUNCE_BUF_SIZE;
@@ -195,41 +277,37 @@
 		}
 	}
 
-	len = MTK_NOR_PRG_MAX_SIZE - op->cmd.nbytes - op->addr.nbytes -
-	      op->dummy.nbytes;
-	if (op->data.nbytes > len)
-		op->data.nbytes = len;
-
+	mtk_nor_adj_prg_size(op);
 	return 0;
 }
 
 static bool mtk_nor_supports_op(struct spi_mem *mem,
 				const struct spi_mem_op *op)
 {
-	size_t len;
+	if (!spi_mem_default_supports_op(mem, op))
+		return false;
 
 	if (op->cmd.buswidth != 1)
 		return false;
 
-	/* DTR ops not supported. */
-	if (op->cmd.dtr || op->addr.dtr || op->dummy.dtr || op->data.dtr)
-		return false;
-	if (op->cmd.nbytes != 1)
-		return false;
-
 	if ((op->addr.nbytes == 3) || (op->addr.nbytes == 4)) {
-		if ((op->data.dir == SPI_MEM_DATA_IN) && mtk_nor_match_read(op))
-			return true;
-		else if (op->data.dir == SPI_MEM_DATA_OUT)
-			return (op->addr.buswidth == 1) &&
-			       (op->dummy.buswidth == 0) &&
-			       (op->data.buswidth == 1);
+		switch(op->data.dir) {
+		case SPI_MEM_DATA_IN:
+			if (mtk_nor_match_read(op))
+				return true;
+			break;
+		case SPI_MEM_DATA_OUT:
+			if ((op->addr.buswidth == 1) &&
+			    (op->dummy.nbytes == 0) &&
+			    (op->data.buswidth == 1))
+				return true;
+			break;
+		default:
+			break;
+		}
 	}
-	len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
-	if ((len > MTK_NOR_PRG_MAX_SIZE) ||
-	    ((op->data.nbytes) && (len == MTK_NOR_PRG_MAX_SIZE)))
-		return false;
-	return true;
+
+	return mtk_nor_match_prg(op);
 }
 
 static void mtk_nor_setup_bus(struct mtk_nor *sp, const struct spi_mem_op *op)
@@ -258,24 +336,24 @@
 	mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, reg, MTK_NOR_BUS_MODE_MASK);
 }
 
-static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length,
-			    u8 *buffer)
+static int mtk_nor_dma_exec(struct mtk_nor *sp, u32 from, unsigned int length,
+			    dma_addr_t dma_addr)
 {
 	int ret = 0;
 	ulong delay;
 	u32 reg;
-	dma_addr_t dma_addr;
-
-	dma_addr = dma_map_single(sp->dev, buffer, length, DMA_FROM_DEVICE);
-	if (dma_mapping_error(sp->dev, dma_addr)) {
-		dev_err(sp->dev, "failed to map dma buffer.\n");
-		return -EINVAL;
-	}
 
 	writel(from, sp->base + MTK_NOR_REG_DMA_FADR);
 	writel(dma_addr, sp->base + MTK_NOR_REG_DMA_DADR);
 	writel(dma_addr + length, sp->base + MTK_NOR_REG_DMA_END_DADR);
 
+	if (sp->high_dma) {
+		writel(upper_32_bits(dma_addr),
+		       sp->base + MTK_NOR_REG_DMA_DADR_HB);
+		writel(upper_32_bits(dma_addr + length),
+		       sp->base + MTK_NOR_REG_DMA_END_DADR_HB);
+	}
+
 	if (sp->has_irq) {
 		reinit_completion(&sp->op_done);
 		mtk_nor_rmw(sp, MTK_NOR_REG_IRQ_EN, MTK_NOR_IRQ_DMA, 0);
@@ -295,30 +373,49 @@
 					 (delay + 1) * 100);
 	}
 
-	dma_unmap_single(sp->dev, dma_addr, length, DMA_FROM_DEVICE);
 	if (ret < 0)
 		dev_err(sp->dev, "dma read timeout.\n");
 
 	return ret;
 }
 
-static int mtk_nor_read_bounce(struct mtk_nor *sp, u32 from,
-			       unsigned int length, u8 *buffer)
+static int mtk_nor_read_bounce(struct mtk_nor *sp, const struct spi_mem_op *op)
 {
 	unsigned int rdlen;
 	int ret;
 
-	if (length & MTK_NOR_DMA_ALIGN_MASK)
-		rdlen = (length + MTK_NOR_DMA_ALIGN) & ~MTK_NOR_DMA_ALIGN_MASK;
+	if (op->data.nbytes & MTK_NOR_DMA_ALIGN_MASK)
+		rdlen = (op->data.nbytes + MTK_NOR_DMA_ALIGN) & ~MTK_NOR_DMA_ALIGN_MASK;
 	else
-		rdlen = length;
+		rdlen = op->data.nbytes;
 
-	ret = mtk_nor_read_dma(sp, from, rdlen, sp->buffer);
-	if (ret)
-		return ret;
+	ret = mtk_nor_dma_exec(sp, op->addr.val, rdlen, sp->buffer_dma);
 
-	memcpy(buffer, sp->buffer, length);
-	return 0;
+	if (!ret)
+		memcpy(op->data.buf.in, sp->buffer, op->data.nbytes);
+
+	return ret;
+}
+
+static int mtk_nor_read_dma(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	int ret;
+	dma_addr_t dma_addr;
+
+	if (need_bounce(sp, op))
+		return mtk_nor_read_bounce(sp, op);
+
+	dma_addr = dma_map_single(sp->dev, op->data.buf.in,
+				  op->data.nbytes, DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(sp->dev, dma_addr))
+		return -EINVAL;
+
+	ret = mtk_nor_dma_exec(sp, op->addr.val, op->data.nbytes, dma_addr);
+
+	dma_unmap_single(sp->dev, dma_addr, op->data.nbytes, DMA_FROM_DEVICE);
+
+	return ret;
 }
 
 static int mtk_nor_read_pio(struct mtk_nor *sp, const struct spi_mem_op *op)
@@ -397,6 +494,83 @@
 	return mtk_nor_cmd_exec(sp, MTK_NOR_CMD_WRITE, 6 * BITS_PER_BYTE);
 }
 
+static int mtk_nor_spi_mem_prg(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	int rx_len = 0;
+	int reg_offset = MTK_NOR_REG_PRGDATA_MAX;
+	int tx_len, prg_len;
+	int i, ret;
+	void __iomem *reg;
+	u8 bufbyte;
+
+	tx_len = op->cmd.nbytes + op->addr.nbytes;
+
+	// count dummy bytes only if we need to write data after it
+	if (op->data.dir == SPI_MEM_DATA_OUT)
+		tx_len += op->dummy.nbytes + op->data.nbytes;
+	else if (op->data.dir == SPI_MEM_DATA_IN)
+		rx_len = op->data.nbytes;
+
+	prg_len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes +
+		  op->data.nbytes;
+
+	// an invalid op may reach here if the caller calls exec_op without
+	// adjust_op_size. return -EINVAL instead of -ENOTSUPP so that
+	// spi-mem won't try this op again with generic spi transfers.
+	if ((tx_len > MTK_NOR_REG_PRGDATA_MAX + 1) ||
+	    (rx_len > MTK_NOR_REG_SHIFT_MAX + 1) ||
+	    (prg_len > MTK_NOR_PRG_CNT_MAX / 8))
+		return -EINVAL;
+
+	// fill tx data
+	for (i = op->cmd.nbytes; i > 0; i--, reg_offset--) {
+		reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+		bufbyte = (op->cmd.opcode >> ((i - 1) * BITS_PER_BYTE)) & 0xff;
+		writeb(bufbyte, reg);
+	}
+
+	for (i = op->addr.nbytes; i > 0; i--, reg_offset--) {
+		reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+		bufbyte = (op->addr.val >> ((i - 1) * BITS_PER_BYTE)) & 0xff;
+		writeb(bufbyte, reg);
+	}
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		for (i = 0; i < op->dummy.nbytes; i++, reg_offset--) {
+			reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+			writeb(0, reg);
+		}
+
+		for (i = 0; i < op->data.nbytes; i++, reg_offset--) {
+			reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+			writeb(((const u8 *)(op->data.buf.out))[i], reg);
+		}
+	}
+
+	for (; reg_offset >= 0; reg_offset--) {
+		reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+		writeb(0, reg);
+	}
+
+	// trigger op
+	writel(prg_len * BITS_PER_BYTE, sp->base + MTK_NOR_REG_PRG_CNT);
+	ret = mtk_nor_cmd_exec(sp, MTK_NOR_CMD_PROGRAM,
+			       prg_len * BITS_PER_BYTE);
+	if (ret)
+		return ret;
+
+	// fetch read data
+	reg_offset = 0;
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		for (i = op->data.nbytes - 1; i >= 0; i--, reg_offset++) {
+			reg = sp->base + MTK_NOR_REG_SHIFT(reg_offset);
+			((u8 *)(op->data.buf.in))[i] = readb(reg);
+		}
+	}
+
+	return 0;
+}
+
 static int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
 	struct mtk_nor *sp = spi_controller_get_devdata(mem->spi->master);
@@ -404,7 +578,7 @@
 
 	if ((op->data.nbytes == 0) ||
 	    ((op->addr.nbytes != 3) && (op->addr.nbytes != 4)))
-		return -ENOTSUPP;
+		return mtk_nor_spi_mem_prg(sp, op);
 
 	if (op->data.dir == SPI_MEM_DATA_OUT) {
 		mtk_nor_set_addr(sp, op);
@@ -422,19 +596,12 @@
 		if (op->data.nbytes == 1) {
 			mtk_nor_set_addr(sp, op);
 			return mtk_nor_read_pio(sp, op);
-		} else if (((ulong)(op->data.buf.in) &
-			    MTK_NOR_DMA_ALIGN_MASK)) {
-			return mtk_nor_read_bounce(sp, op->addr.val,
-						   op->data.nbytes,
-						   op->data.buf.in);
 		} else {
-			return mtk_nor_read_dma(sp, op->addr.val,
-						op->data.nbytes,
-						op->data.buf.in);
+			return mtk_nor_read_dma(sp, op);
 		}
 	}
 
-	return -ENOTSUPP;
+	return mtk_nor_spi_mem_prg(sp, op);
 }
 
 static int mtk_nor_setup(struct spi_device *spi)
@@ -524,22 +691,15 @@
 	return 0;
 }
 
-static int mtk_nor_init(struct mtk_nor *sp)
+static void mtk_nor_init(struct mtk_nor *sp)
 {
-	int ret;
-
-	ret = mtk_nor_enable_clk(sp);
-	if (ret)
-		return ret;
-
-	sp->spi_freq = clk_get_rate(sp->spi_clk);
+	writel(0, sp->base + MTK_NOR_REG_IRQ_EN);
+	writel(MTK_NOR_IRQ_MASK, sp->base + MTK_NOR_REG_IRQ_STAT);
 
 	writel(MTK_NOR_ENABLE_SF_CMD, sp->base + MTK_NOR_REG_WP);
 	mtk_nor_rmw(sp, MTK_NOR_REG_CFG2, MTK_NOR_WR_CUSTOM_OP_EN, 0);
 	mtk_nor_rmw(sp, MTK_NOR_REG_CFG3,
 		    MTK_NOR_DISABLE_WREN | MTK_NOR_DISABLE_SR_POLL, 0);
-
-	return ret;
 }
 
 static irqreturn_t mtk_nor_irq_handler(int irq, void *data)
@@ -575,7 +735,8 @@
 };
 
 static const struct of_device_id mtk_nor_match[] = {
-	{ .compatible = "mediatek,mt8173-nor" },
+	{ .compatible = "mediatek,mt8192-nor", .data = (void *)36 },
+	{ .compatible = "mediatek,mt8173-nor", .data = (void *)32 },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, mtk_nor_match);
@@ -585,9 +746,9 @@
 	struct spi_controller *ctlr;
 	struct mtk_nor *sp;
 	void __iomem *base;
-	u8 *buffer;
 	struct clk *spi_clk, *ctlr_clk;
 	int ret, irq;
+	unsigned long dma_bits;
 
 	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
@@ -601,15 +762,11 @@
 	if (IS_ERR(ctlr_clk))
 		return PTR_ERR(ctlr_clk);
 
-	buffer = devm_kmalloc(&pdev->dev,
-			      MTK_NOR_BOUNCE_BUF_SIZE + MTK_NOR_DMA_ALIGN,
-			      GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
-	if ((ulong)buffer & MTK_NOR_DMA_ALIGN_MASK)
-		buffer = (u8 *)(((ulong)buffer + MTK_NOR_DMA_ALIGN) &
-				~MTK_NOR_DMA_ALIGN_MASK);
+	dma_bits = (unsigned long)of_device_get_match_data(&pdev->dev);
+	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_bits))) {
+		dev_err(&pdev->dev, "failed to set dma mask(%lu)\n", dma_bits);
+		return -EINVAL;
+	}
 
 	ctlr = spi_alloc_master(&pdev->dev, sizeof(*sp));
 	if (!ctlr) {
@@ -625,25 +782,43 @@
 	ctlr->num_chipselect = 1;
 	ctlr->setup = mtk_nor_setup;
 	ctlr->transfer_one_message = mtk_nor_transfer_one_message;
+	ctlr->auto_runtime_pm = true;
 
 	dev_set_drvdata(&pdev->dev, ctlr);
 
 	sp = spi_controller_get_devdata(ctlr);
 	sp->base = base;
-	sp->buffer = buffer;
 	sp->has_irq = false;
 	sp->wbuf_en = false;
 	sp->ctlr = ctlr;
 	sp->dev = &pdev->dev;
 	sp->spi_clk = spi_clk;
 	sp->ctlr_clk = ctlr_clk;
+	sp->high_dma = (dma_bits > 32);
+	sp->buffer = dmam_alloc_coherent(&pdev->dev,
+				MTK_NOR_BOUNCE_BUF_SIZE + MTK_NOR_DMA_ALIGN,
+				&sp->buffer_dma, GFP_KERNEL);
+	if (!sp->buffer)
+		return -ENOMEM;
+
+	if ((uintptr_t)sp->buffer & MTK_NOR_DMA_ALIGN_MASK) {
+		dev_err(sp->dev, "misaligned allocation of internal buffer.\n");
+		return -ENOMEM;
+	}
+
+	ret = mtk_nor_enable_clk(sp);
+	if (ret < 0)
+		return ret;
+
+	sp->spi_freq = clk_get_rate(sp->spi_clk);
+
+	mtk_nor_init(sp);
 
 	irq = platform_get_irq_optional(pdev, 0);
+
 	if (irq < 0) {
 		dev_warn(sp->dev, "IRQ not available.");
 	} else {
-		writel(MTK_NOR_IRQ_MASK, base + MTK_NOR_REG_IRQ_STAT);
-		writel(0, base + MTK_NOR_REG_IRQ_EN);
 		ret = devm_request_irq(sp->dev, irq, mtk_nor_irq_handler, 0,
 				       pdev->name, sp);
 		if (ret < 0) {
@@ -654,34 +829,86 @@
 		}
 	}
 
-	ret = mtk_nor_init(sp);
-	if (ret < 0) {
-		kfree(ctlr);
-		return ret;
-	}
+	pm_runtime_set_autosuspend_delay(&pdev->dev, -1);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_noresume(&pdev->dev);
+
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
+	if (ret < 0)
+		goto err_probe;
+
+	pm_runtime_mark_last_busy(&pdev->dev);
+	pm_runtime_put_autosuspend(&pdev->dev);
 
 	dev_info(&pdev->dev, "spi frequency: %d Hz\n", sp->spi_freq);
 
-	return devm_spi_register_controller(&pdev->dev, ctlr);
+	return 0;
+
+err_probe:
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+
+	mtk_nor_disable_clk(sp);
+
+	return ret;
 }
 
 static int mtk_nor_remove(struct platform_device *pdev)
 {
-	struct spi_controller *ctlr;
-	struct mtk_nor *sp;
+	struct spi_controller *ctlr = dev_get_drvdata(&pdev->dev);
+	struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
 
-	ctlr = dev_get_drvdata(&pdev->dev);
-	sp = spi_controller_get_devdata(ctlr);
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
 
 	mtk_nor_disable_clk(sp);
 
 	return 0;
 }
 
+static int __maybe_unused mtk_nor_runtime_suspend(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
+
+	mtk_nor_disable_clk(sp);
+
+	return 0;
+}
+
+static int __maybe_unused mtk_nor_runtime_resume(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
+
+	return mtk_nor_enable_clk(sp);
+}
+
+static int __maybe_unused mtk_nor_suspend(struct device *dev)
+{
+	return pm_runtime_force_suspend(dev);
+}
+
+static int __maybe_unused mtk_nor_resume(struct device *dev)
+{
+	return pm_runtime_force_resume(dev);
+}
+
+static const struct dev_pm_ops mtk_nor_pm_ops = {
+	SET_RUNTIME_PM_OPS(mtk_nor_runtime_suspend,
+			   mtk_nor_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(mtk_nor_suspend, mtk_nor_resume)
+};
+
 static struct platform_driver mtk_nor_driver = {
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = mtk_nor_match,
+		.pm = &mtk_nor_pm_ops,
 	},
 	.probe = mtk_nor_probe,
 	.remove = mtk_nor_remove,
diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c
index cc9ef37..37dfc6e 100644
--- a/drivers/spi/spi-mux.c
+++ b/drivers/spi/spi-mux.c
@@ -139,9 +139,8 @@
 
 	priv->mux = devm_mux_control_get(&spi->dev, NULL);
 	if (IS_ERR(priv->mux)) {
-		ret = PTR_ERR(priv->mux);
-		if (ret != -EPROBE_DEFER)
-			dev_err(&spi->dev, "failed to get control-mux\n");
+		ret = dev_err_probe(&spi->dev, PTR_ERR(priv->mux),
+				    "failed to get control-mux\n");
 		goto err_put_ctlr;
 	}
 
diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c
index 9468e71..341f7cf 100644
--- a/drivers/spi/spi-npcm-fiu.c
+++ b/drivers/spi/spi-npcm-fiu.c
@@ -677,7 +677,6 @@
 	struct npcm_fiu_spi *fiu;
 	void __iomem *regbase;
 	struct resource *res;
-	int ret;
 	int id;
 
 	ctrl = spi_alloc_master(dev, sizeof(*fiu));
@@ -736,11 +735,7 @@
 	ctrl->num_chipselect = fiu->info->max_cs;
 	ctrl->dev.of_node = dev->of_node;
 
-	ret = devm_spi_register_master(dev, ctrl);
-	if (ret)
-		return ret;
-
-	return 0;
+	return devm_spi_register_master(dev, ctrl);
 }
 
 static int npcm_fiu_remove(struct platform_device *pdev)
diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
index 1ccda82..0d41406 100644
--- a/drivers/spi/spi-nxp-fspi.c
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -3,7 +3,8 @@
 /*
  * NXP FlexSPI(FSPI) controller driver.
  *
- * Copyright 2019 NXP.
+ * Copyright 2019-2020 NXP
+ * Copyright 2020 Puresoftware Ltd.
  *
  * FlexSPI is a flexsible SPI host controller which supports two SPI
  * channels and up to 4 external devices. Each channel supports
@@ -30,6 +31,7 @@
  *     Frieder Schrempf <frieder.schrempf@kontron.de>
  */
 
+#include <linux/acpi.h>
 #include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
@@ -563,6 +565,9 @@
 {
 	int ret;
 
+	if (is_acpi_node(f->dev->fwnode))
+		return 0;
+
 	ret = clk_prepare_enable(f->clk_en);
 	if (ret)
 		return ret;
@@ -576,10 +581,15 @@
 	return 0;
 }
 
-static void nxp_fspi_clk_disable_unprep(struct nxp_fspi *f)
+static int nxp_fspi_clk_disable_unprep(struct nxp_fspi *f)
 {
+	if (is_acpi_node(f->dev->fwnode))
+		return 0;
+
 	clk_disable_unprepare(f->clk);
 	clk_disable_unprepare(f->clk_en);
+
+	return 0;
 }
 
 /*
@@ -1001,7 +1011,7 @@
 
 	f = spi_controller_get_devdata(ctlr);
 	f->dev = dev;
-	f->devtype_data = of_device_get_match_data(dev);
+	f->devtype_data = device_get_match_data(dev);
 	if (!f->devtype_data) {
 		ret = -ENODEV;
 		goto err_put_ctrl;
@@ -1010,7 +1020,12 @@
 	platform_set_drvdata(pdev, f);
 
 	/* find the resources - configuration register address space */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_base");
+	if (is_acpi_node(f->dev->fwnode))
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	else
+		res = platform_get_resource_byname(pdev,
+				IORESOURCE_MEM, "fspi_base");
+
 	f->iobase = devm_ioremap_resource(dev, res);
 	if (IS_ERR(f->iobase)) {
 		ret = PTR_ERR(f->iobase);
@@ -1018,7 +1033,12 @@
 	}
 
 	/* find the resources - controller memory mapped space */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_mmap");
+	if (is_acpi_node(f->dev->fwnode))
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	else
+		res = platform_get_resource_byname(pdev,
+				IORESOURCE_MEM, "fspi_mmap");
+
 	if (!res) {
 		ret = -ENODEV;
 		goto err_put_ctrl;
@@ -1029,22 +1049,24 @@
 	f->memmap_phy_size = resource_size(res);
 
 	/* find the clocks */
-	f->clk_en = devm_clk_get(dev, "fspi_en");
-	if (IS_ERR(f->clk_en)) {
-		ret = PTR_ERR(f->clk_en);
-		goto err_put_ctrl;
-	}
+	if (dev_of_node(&pdev->dev)) {
+		f->clk_en = devm_clk_get(dev, "fspi_en");
+		if (IS_ERR(f->clk_en)) {
+			ret = PTR_ERR(f->clk_en);
+			goto err_put_ctrl;
+		}
 
-	f->clk = devm_clk_get(dev, "fspi");
-	if (IS_ERR(f->clk)) {
-		ret = PTR_ERR(f->clk);
-		goto err_put_ctrl;
-	}
+		f->clk = devm_clk_get(dev, "fspi");
+		if (IS_ERR(f->clk)) {
+			ret = PTR_ERR(f->clk);
+			goto err_put_ctrl;
+		}
 
-	ret = nxp_fspi_clk_prep_enable(f);
-	if (ret) {
-		dev_err(dev, "can not enable the clock\n");
-		goto err_put_ctrl;
+		ret = nxp_fspi_clk_prep_enable(f);
+		if (ret) {
+			dev_err(dev, "can not enable the clock\n");
+			goto err_put_ctrl;
+		}
 	}
 
 	/* find the irq */
@@ -1127,6 +1149,14 @@
 };
 MODULE_DEVICE_TABLE(of, nxp_fspi_dt_ids);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id nxp_fspi_acpi_ids[] = {
+	{ "NXP0009", .driver_data = (kernel_ulong_t)&lx2160a_data, },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, nxp_fspi_acpi_ids);
+#endif
+
 static const struct dev_pm_ops nxp_fspi_pm_ops = {
 	.suspend	= nxp_fspi_suspend,
 	.resume		= nxp_fspi_resume,
@@ -1136,6 +1166,7 @@
 	.driver = {
 		.name	= "nxp-fspi",
 		.of_match_table = nxp_fspi_dt_ids,
+		.acpi_match_table = ACPI_PTR(nxp_fspi_acpi_ids),
 		.pm =   &nxp_fspi_pm_ops,
 	},
 	.probe          = nxp_fspi_probe,
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 1c9478e..d4c9510 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -24,7 +24,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/gcd.h>
-#include <linux/iopoll.h>
 
 #include <linux/spi/spi.h>
 
@@ -348,9 +347,19 @@
 
 static int mcspi_wait_for_reg_bit(void __iomem *reg, unsigned long bit)
 {
-	u32 val;
+	unsigned long timeout;
 
-	return readl_poll_timeout(reg, val, val & bit, 1, MSEC_PER_SEC);
+	timeout = jiffies + msecs_to_jiffies(1000);
+	while (!(readl_relaxed(reg) & bit)) {
+		if (time_after(jiffies, timeout)) {
+			if (!(readl_relaxed(reg) & bit))
+				return -ETIMEDOUT;
+			else
+				return 0;
+		}
+		cpu_relax();
+	}
+	return 0;
 }
 
 static int mcspi_wait_for_completion(struct  omap2_mcspi *mcspi,
diff --git a/drivers/spi/spi-qcom-qspi.c b/drivers/spi/spi-qcom-qspi.c
index b8857a9..5eed88a 100644
--- a/drivers/spi/spi-qcom-qspi.c
+++ b/drivers/spi/spi-qcom-qspi.c
@@ -143,7 +143,6 @@
 	struct qspi_xfer xfer;
 	struct icc_path *icc_path_cpu_to_qspi;
 	struct opp_table *opp_table;
-	bool has_opp_table;
 	unsigned long last_speed;
 	/* Lock to protect data accessed by IRQs */
 	spinlock_t lock;
@@ -421,9 +420,8 @@
 	u32 int_status;
 	struct qcom_qspi *ctrl = dev_id;
 	irqreturn_t ret = IRQ_NONE;
-	unsigned long flags;
 
-	spin_lock_irqsave(&ctrl->lock, flags);
+	spin_lock(&ctrl->lock);
 
 	int_status = readl(ctrl->base + MSTR_INT_STATUS);
 	writel(int_status, ctrl->base + MSTR_INT_STATUS);
@@ -451,7 +449,7 @@
 		spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
 	}
 
-	spin_unlock_irqrestore(&ctrl->lock, flags);
+	spin_unlock(&ctrl->lock);
 	return ret;
 }
 
@@ -495,9 +493,8 @@
 
 	ctrl->icc_path_cpu_to_qspi = devm_of_icc_get(dev, "qspi-config");
 	if (IS_ERR(ctrl->icc_path_cpu_to_qspi)) {
-		ret = PTR_ERR(ctrl->icc_path_cpu_to_qspi);
-		if (ret != -EPROBE_DEFER)
-			dev_err(dev, "Failed to get cpu path: %d\n", ret);
+		ret = dev_err_probe(dev, PTR_ERR(ctrl->icc_path_cpu_to_qspi),
+				    "Failed to get cpu path\n");
 		goto exit_probe_master_put;
 	}
 	/* Set BW vote for register access */
@@ -546,11 +543,9 @@
 	}
 	/* OPP table is optional */
 	ret = dev_pm_opp_of_add_table(&pdev->dev);
-	if (!ret) {
-		ctrl->has_opp_table = true;
-	} else if (ret != -ENODEV) {
+	if (ret && ret != -ENODEV) {
 		dev_err(&pdev->dev, "invalid OPP table in device tree\n");
-		goto exit_probe_master_put;
+		goto exit_probe_put_clkname;
 	}
 
 	pm_runtime_use_autosuspend(dev);
@@ -562,8 +557,9 @@
 		return 0;
 
 	pm_runtime_disable(dev);
-	if (ctrl->has_opp_table)
-		dev_pm_opp_of_remove_table(&pdev->dev);
+	dev_pm_opp_of_remove_table(&pdev->dev);
+
+exit_probe_put_clkname:
 	dev_pm_opp_put_clkname(ctrl->opp_table);
 
 exit_probe_master_put:
@@ -581,8 +577,7 @@
 	spi_unregister_master(master);
 
 	pm_runtime_disable(&pdev->dev);
-	if (ctrl->has_opp_table)
-		dev_pm_opp_of_remove_table(&pdev->dev);
+	dev_pm_opp_of_remove_table(&pdev->dev);
 	dev_pm_opp_put_clkname(ctrl->opp_table);
 
 	return 0;
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index a364b99..8dcb2e7 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -848,7 +848,7 @@
 {
 	struct spi_qup *controller = spi_master_get_devdata(master);
 	unsigned long timeout, flags;
-	int ret = -EIO;
+	int ret;
 
 	ret = spi_qup_io_prep(spi, xfer);
 	if (ret)
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index cbc2387..e39fd38 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -161,6 +161,7 @@
 #define SPCMD_SPRW		0x0010	/* SPI Read/Write Access (Dual/Quad) */
 #define SPCMD_SSLA(i)		((i) << 4)	/* SSL Assert Signal Setting */
 #define SPCMD_BRDV_MASK		0x000c	/* Bit Rate Division Setting */
+#define SPCMD_BRDV(brdv)	((brdv) << 2)
 #define SPCMD_CPOL		0x0002	/* Clock Polarity Setting */
 #define SPCMD_CPHA		0x0001	/* Clock Phase Setting */
 
@@ -242,24 +243,40 @@
 	int (*transfer_one)(struct spi_controller *ctlr,
 			    struct spi_device *spi, struct spi_transfer *xfer);
 	u16 extra_mode_bits;
+	u16 min_div;
+	u16 max_div;
 	u16 flags;
 	u16 fifo_size;
 	u8 num_hw_ss;
 };
 
+static void rspi_set_rate(struct rspi_data *rspi)
+{
+	unsigned long clksrc;
+	int brdv = 0, spbr;
+
+	clksrc = clk_get_rate(rspi->clk);
+	spbr = DIV_ROUND_UP(clksrc, 2 * rspi->speed_hz) - 1;
+	while (spbr > 255 && brdv < 3) {
+		brdv++;
+		spbr = DIV_ROUND_UP(spbr + 1, 2) - 1;
+	}
+
+	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
+	rspi->spcmd |= SPCMD_BRDV(brdv);
+	rspi->speed_hz = DIV_ROUND_UP(clksrc, (2U << brdv) * (spbr + 1));
+}
+
 /*
  * functions for RSPI on legacy SH
  */
 static int rspi_set_config_register(struct rspi_data *rspi, int access_size)
 {
-	int spbr;
-
 	/* Sets output mode, MOSI signal, and (optionally) loopback */
 	rspi_write8(rspi, rspi->sppcr, RSPI_SPPCR);
 
 	/* Sets transfer bit rate */
-	spbr = DIV_ROUND_UP(clk_get_rate(rspi->clk), 2 * rspi->speed_hz) - 1;
-	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
+	rspi_set_rate(rspi);
 
 	/* Disable dummy transmission, set 16-bit word access, 1 frame */
 	rspi_write8(rspi, 0, RSPI_SPDCR);
@@ -289,25 +306,11 @@
  */
 static int rspi_rz_set_config_register(struct rspi_data *rspi, int access_size)
 {
-	int spbr;
-	int div = 0;
-	unsigned long clksrc;
-
 	/* Sets output mode, MOSI signal, and (optionally) loopback */
 	rspi_write8(rspi, rspi->sppcr, RSPI_SPPCR);
 
-	clksrc = clk_get_rate(rspi->clk);
-	while (div < 3) {
-		if (rspi->speed_hz >= clksrc/4) /* 4=(CLK/2)/2 */
-			break;
-		div++;
-		clksrc /= 2;
-	}
-
 	/* Sets transfer bit rate */
-	spbr = DIV_ROUND_UP(clksrc, 2 * rspi->speed_hz) - 1;
-	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
-	rspi->spcmd |= div << 2;
+	rspi_set_rate(rspi);
 
 	/* Disable dummy transmission, set byte access */
 	rspi_write8(rspi, SPDCR_SPLBYTE, RSPI_SPDCR);
@@ -334,14 +337,28 @@
  */
 static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
 {
-	int spbr;
+	unsigned long clksrc;
+	int brdv = 0, spbr;
 
 	/* Sets output mode, MOSI signal, and (optionally) loopback */
 	rspi_write8(rspi, rspi->sppcr, RSPI_SPPCR);
 
 	/* Sets transfer bit rate */
-	spbr = DIV_ROUND_UP(clk_get_rate(rspi->clk), 2 * rspi->speed_hz);
-	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
+	clksrc = clk_get_rate(rspi->clk);
+	if (rspi->speed_hz >= clksrc) {
+		spbr = 0;
+		rspi->speed_hz = clksrc;
+	} else {
+		spbr = DIV_ROUND_UP(clksrc, 2 * rspi->speed_hz);
+		while (spbr > 255 && brdv < 3) {
+			brdv++;
+			spbr = DIV_ROUND_UP(spbr, 2);
+		}
+		spbr = clamp(spbr, 0, 255);
+		rspi->speed_hz = DIV_ROUND_UP(clksrc, (2U << brdv) * spbr);
+	}
+	rspi_write8(rspi, spbr, RSPI_SPBR);
+	rspi->spcmd |= SPCMD_BRDV(brdv);
 
 	/* Disable dummy transmission, set byte access */
 	rspi_write8(rspi, 0, RSPI_SPDCR);
@@ -686,6 +703,8 @@
 {
 	int ret;
 
+	xfer->effective_speed_hz = rspi->speed_hz;
+
 	ret = rspi_dma_check_then_transfer(rspi, xfer);
 	if (ret != -EAGAIN)
 		return ret;
@@ -841,6 +860,7 @@
 {
 	struct rspi_data *rspi = spi_controller_get_devdata(ctlr);
 
+	xfer->effective_speed_hz = rspi->speed_hz;
 	if (spi->mode & SPI_LOOP) {
 		return qspi_transfer_out_in(rspi, xfer);
 	} else if (xfer->tx_nbits > SPI_NBITS_SINGLE) {
@@ -1163,6 +1183,8 @@
 static const struct spi_ops rspi_ops = {
 	.set_config_register =	rspi_set_config_register,
 	.transfer_one =		rspi_transfer_one,
+	.min_div =		2,
+	.max_div =		4096,
 	.flags =		SPI_CONTROLLER_MUST_TX,
 	.fifo_size =		8,
 	.num_hw_ss =		2,
@@ -1171,6 +1193,8 @@
 static const struct spi_ops rspi_rz_ops = {
 	.set_config_register =	rspi_rz_set_config_register,
 	.transfer_one =		rspi_rz_transfer_one,
+	.min_div =		2,
+	.max_div =		4096,
 	.flags =		SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX,
 	.fifo_size =		8,	/* 8 for TX, 32 for RX */
 	.num_hw_ss =		1,
@@ -1181,6 +1205,8 @@
 	.transfer_one =		qspi_transfer_one,
 	.extra_mode_bits =	SPI_TX_DUAL | SPI_TX_QUAD |
 				SPI_RX_DUAL | SPI_RX_QUAD,
+	.min_div =		1,
+	.max_div =		4080,
 	.flags =		SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX,
 	.fifo_size =		32,
 	.num_hw_ss =		1,
@@ -1242,6 +1268,7 @@
 	int ret;
 	const struct rspi_plat_data *rspi_pd;
 	const struct spi_ops *ops;
+	unsigned long clksrc;
 
 	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct rspi_data));
 	if (ctlr == NULL)
@@ -1261,13 +1288,6 @@
 			ctlr->num_chipselect = 2; /* default */
 	}
 
-	/* ops parameter check */
-	if (!ops->set_config_register) {
-		dev_err(&pdev->dev, "there is no set_config_register\n");
-		ret = -ENODEV;
-		goto error1;
-	}
-
 	rspi = spi_controller_get_devdata(ctlr);
 	platform_set_drvdata(pdev, rspi);
 	rspi->ops = ops;
@@ -1301,6 +1321,9 @@
 	ctlr->unprepare_message = rspi_unprepare_message;
 	ctlr->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST |
 			  SPI_LOOP | ops->extra_mode_bits;
+	clksrc = clk_get_rate(rspi->clk);
+	ctlr->min_speed_hz = DIV_ROUND_UP(clksrc, ops->max_div);
+	ctlr->max_speed_hz = DIV_ROUND_UP(clksrc, ops->min_div);
 	ctlr->flags = ops->flags;
 	ctlr->dev.of_node = pdev->dev.of_node;
 	ctlr->use_gpio_descriptors = true;
diff --git a/drivers/spi/spi-s3c24xx.c b/drivers/spi/spi-s3c24xx.c
index 2cb3b61..7742170 100644
--- a/drivers/spi/spi-s3c24xx.c
+++ b/drivers/spi/spi-s3c24xx.c
@@ -28,7 +28,7 @@
 #include "spi-s3c24xx-fiq.h"
 
 /**
- * s3c24xx_spi_devstate - per device data
+ * struct s3c24xx_spi_devstate - per device data
  * @hz: Last frequency calculated for @sppre field.
  * @mode: Last mode setting for the @spcon field.
  * @spcon: Value to write to the SPCON register.
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 924b244..dfa7c91 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -29,7 +29,7 @@
 #define S3C64XX_SPI_CH_CFG		0x00
 #define S3C64XX_SPI_CLK_CFG		0x04
 #define S3C64XX_SPI_MODE_CFG		0x08
-#define S3C64XX_SPI_SLAVE_SEL		0x0C
+#define S3C64XX_SPI_CS_REG		0x0C
 #define S3C64XX_SPI_INT_EN		0x10
 #define S3C64XX_SPI_STATUS		0x14
 #define S3C64XX_SPI_TX_DATA		0x18
@@ -64,9 +64,9 @@
 #define S3C64XX_SPI_MODE_TXDMA_ON		(1<<1)
 #define S3C64XX_SPI_MODE_4BURST			(1<<0)
 
-#define S3C64XX_SPI_SLAVE_AUTO			(1<<1)
-#define S3C64XX_SPI_SLAVE_SIG_INACT		(1<<0)
-#define S3C64XX_SPI_SLAVE_NSC_CNT_2		(2<<4)
+#define S3C64XX_SPI_CS_NSC_CNT_2		(2<<4)
+#define S3C64XX_SPI_CS_AUTO			(1<<1)
+#define S3C64XX_SPI_CS_SIG_INACT		(1<<0)
 
 #define S3C64XX_SPI_INT_TRAILING_EN		(1<<6)
 #define S3C64XX_SPI_INT_RX_OVERRUN_EN		(1<<5)
@@ -122,6 +122,7 @@
 
 struct s3c64xx_spi_dma_data {
 	struct dma_chan *ch;
+	dma_cookie_t cookie;
 	enum dma_transfer_direction direction;
 };
 
@@ -161,11 +162,8 @@
  * @cntrlr_info: Platform specific data for the controller this driver manages.
  * @lock: Controller specific lock.
  * @state: Set of FLAGS to indicate status.
- * @rx_dmach: Controller's DMA channel for Rx.
- * @tx_dmach: Controller's DMA channel for Tx.
  * @sfr_start: BUS address of SPI controller regs.
  * @regs: Pointer to ioremap'ed controller registers.
- * @irq: interrupt
  * @xfer_completion: To indicate completion of xfer task.
  * @cur_mode: Stores the active configuration of the controller.
  * @cur_bpw: Stores the active bits per word settings.
@@ -182,7 +180,7 @@
 	struct clk                      *ioclk;
 	struct platform_device          *pdev;
 	struct spi_master               *master;
-	struct s3c64xx_spi_info  *cntrlr_info;
+	struct s3c64xx_spi_info         *cntrlr_info;
 	spinlock_t                      lock;
 	unsigned long                   sfr_start;
 	struct completion               xfer_completion;
@@ -271,12 +269,13 @@
 	spin_unlock_irqrestore(&sdd->lock, flags);
 }
 
-static void prepare_dma(struct s3c64xx_spi_dma_data *dma,
+static int prepare_dma(struct s3c64xx_spi_dma_data *dma,
 			struct sg_table *sgt)
 {
 	struct s3c64xx_spi_driver_data *sdd;
 	struct dma_slave_config config;
 	struct dma_async_tx_descriptor *desc;
+	int ret;
 
 	memset(&config, 0, sizeof(config));
 
@@ -300,12 +299,24 @@
 
 	desc = dmaengine_prep_slave_sg(dma->ch, sgt->sgl, sgt->nents,
 				       dma->direction, DMA_PREP_INTERRUPT);
+	if (!desc) {
+		dev_err(&sdd->pdev->dev, "unable to prepare %s scatterlist",
+			dma->direction == DMA_DEV_TO_MEM ? "rx" : "tx");
+		return -ENOMEM;
+	}
 
 	desc->callback = s3c64xx_spi_dmacb;
 	desc->callback_param = dma;
 
-	dmaengine_submit(desc);
+	dma->cookie = dmaengine_submit(desc);
+	ret = dma_submit_error(dma->cookie);
+	if (ret) {
+		dev_err(&sdd->pdev->dev, "DMA submission failed");
+		return -EIO;
+	}
+
 	dma_async_issue_pending(dma->ch);
+	return 0;
 }
 
 static void s3c64xx_spi_set_cs(struct spi_device *spi, bool enable)
@@ -318,18 +329,18 @@
 
 	if (enable) {
 		if (!(sdd->port_conf->quirks & S3C64XX_SPI_QUIRK_CS_AUTO)) {
-			writel(0, sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+			writel(0, sdd->regs + S3C64XX_SPI_CS_REG);
 		} else {
-			u32 ssel = readl(sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+			u32 ssel = readl(sdd->regs + S3C64XX_SPI_CS_REG);
 
-			ssel |= (S3C64XX_SPI_SLAVE_AUTO |
-						S3C64XX_SPI_SLAVE_NSC_CNT_2);
-			writel(ssel, sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+			ssel |= (S3C64XX_SPI_CS_AUTO |
+						S3C64XX_SPI_CS_NSC_CNT_2);
+			writel(ssel, sdd->regs + S3C64XX_SPI_CS_REG);
 		}
 	} else {
 		if (!(sdd->port_conf->quirks & S3C64XX_SPI_QUIRK_CS_AUTO))
-			writel(S3C64XX_SPI_SLAVE_SIG_INACT,
-			       sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+			writel(S3C64XX_SPI_CS_SIG_INACT,
+			       sdd->regs + S3C64XX_SPI_CS_REG);
 	}
 }
 
@@ -355,11 +366,12 @@
 	return xfer->len > (FIFO_LVL_MASK(sdd) >> 1) + 1;
 }
 
-static void s3c64xx_enable_datapath(struct s3c64xx_spi_driver_data *sdd,
+static int s3c64xx_enable_datapath(struct s3c64xx_spi_driver_data *sdd,
 				    struct spi_transfer *xfer, int dma_mode)
 {
 	void __iomem *regs = sdd->regs;
 	u32 modecfg, chcfg;
+	int ret = 0;
 
 	modecfg = readl(regs + S3C64XX_SPI_MODE_CFG);
 	modecfg &= ~(S3C64XX_SPI_MODE_TXDMA_ON | S3C64XX_SPI_MODE_RXDMA_ON);
@@ -385,7 +397,7 @@
 		chcfg |= S3C64XX_SPI_CH_TXCH_ON;
 		if (dma_mode) {
 			modecfg |= S3C64XX_SPI_MODE_TXDMA_ON;
-			prepare_dma(&sdd->tx_dma, &xfer->tx_sg);
+			ret = prepare_dma(&sdd->tx_dma, &xfer->tx_sg);
 		} else {
 			switch (sdd->cur_bpw) {
 			case 32:
@@ -417,12 +429,17 @@
 			writel(((xfer->len * 8 / sdd->cur_bpw) & 0xffff)
 					| S3C64XX_SPI_PACKET_CNT_EN,
 					regs + S3C64XX_SPI_PACKET_CNT);
-			prepare_dma(&sdd->rx_dma, &xfer->rx_sg);
+			ret = prepare_dma(&sdd->rx_dma, &xfer->rx_sg);
 		}
 	}
 
+	if (ret)
+		return ret;
+
 	writel(modecfg, regs + S3C64XX_SPI_MODE_CFG);
 	writel(chcfg, regs + S3C64XX_SPI_CH_CFG);
+
+	return 0;
 }
 
 static u32 s3c64xx_spi_wait_for_timeout(struct s3c64xx_spi_driver_data *sdd,
@@ -456,7 +473,8 @@
 
 	/* millisecs to xfer 'len' bytes @ 'cur_speed' */
 	ms = xfer->len * 8 * 1000 / sdd->cur_speed;
-	ms += 10; /* some tolerance */
+	ms += 30;               /* some tolerance */
+	ms = max(ms, 100);      /* minimum timeout */
 
 	val = msecs_to_jiffies(ms) + 10;
 	val = wait_for_completion_timeout(&sdd->xfer_completion, val);
@@ -555,9 +573,10 @@
 	return 0;
 }
 
-static void s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd)
+static int s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd)
 {
 	void __iomem *regs = sdd->regs;
+	int ret;
 	u32 val;
 
 	/* Disable Clock */
@@ -605,7 +624,10 @@
 
 	if (sdd->port_conf->clk_from_cmu) {
 		/* The src_clk clock is divided internally by 2 */
-		clk_set_rate(sdd->src_clk, sdd->cur_speed * 2);
+		ret = clk_set_rate(sdd->src_clk, sdd->cur_speed * 2);
+		if (ret)
+			return ret;
+		sdd->cur_speed = clk_get_rate(sdd->src_clk) / 2;
 	} else {
 		/* Configure Clock */
 		val = readl(regs + S3C64XX_SPI_CLK_CFG);
@@ -619,6 +641,8 @@
 		val |= S3C64XX_SPI_ENCLK_ENABLE;
 		writel(val, regs + S3C64XX_SPI_CLK_CFG);
 	}
+
+	return 0;
 }
 
 #define XFER_DMAADDR_INVALID DMA_BIT_MASK(32)
@@ -661,7 +685,9 @@
 		sdd->cur_bpw = bpw;
 		sdd->cur_speed = speed;
 		sdd->cur_mode = spi->mode;
-		s3c64xx_spi_config(sdd);
+		status = s3c64xx_spi_config(sdd);
+		if (status)
+			return status;
 	}
 
 	if (!is_polling(sdd) && (xfer->len > fifo_len) &&
@@ -685,13 +711,18 @@
 		sdd->state &= ~RXBUSY;
 		sdd->state &= ~TXBUSY;
 
-		s3c64xx_enable_datapath(sdd, xfer, use_dma);
-
 		/* Start the signals */
 		s3c64xx_spi_set_cs(spi, true);
 
+		status = s3c64xx_enable_datapath(sdd, xfer, use_dma);
+
 		spin_unlock_irqrestore(&sdd->lock, flags);
 
+		if (status) {
+			dev_err(&spi->dev, "failed to enable data path for transfer: %d\n", status);
+			break;
+		}
+
 		if (use_dma)
 			status = s3c64xx_wait_for_dma(sdd, xfer);
 		else
@@ -699,17 +730,28 @@
 
 		if (status) {
 			dev_err(&spi->dev,
-				"I/O Error: rx-%d tx-%d res:rx-%c tx-%c len-%d\n",
+				"I/O Error: rx-%d tx-%d rx-%c tx-%c len-%d dma-%d res-(%d)\n",
 				xfer->rx_buf ? 1 : 0, xfer->tx_buf ? 1 : 0,
 				(sdd->state & RXBUSY) ? 'f' : 'p',
 				(sdd->state & TXBUSY) ? 'f' : 'p',
-				xfer->len);
+				xfer->len, use_dma ? 1 : 0, status);
 
 			if (use_dma) {
-				if (xfer->tx_buf && (sdd->state & TXBUSY))
+				struct dma_tx_state s;
+
+				if (xfer->tx_buf && (sdd->state & TXBUSY)) {
+					dmaengine_pause(sdd->tx_dma.ch);
+					dmaengine_tx_status(sdd->tx_dma.ch, sdd->tx_dma.cookie, &s);
 					dmaengine_terminate_all(sdd->tx_dma.ch);
-				if (xfer->rx_buf && (sdd->state & RXBUSY))
+					dev_err(&spi->dev, "TX residue: %d\n", s.residue);
+
+				}
+				if (xfer->rx_buf && (sdd->state & RXBUSY)) {
+					dmaengine_pause(sdd->rx_dma.ch);
+					dmaengine_tx_status(sdd->rx_dma.ch, sdd->rx_dma.cookie, &s);
 					dmaengine_terminate_all(sdd->rx_dma.ch);
+					dev_err(&spi->dev, "RX residue: %d\n", s.residue);
+				}
 			}
 		} else {
 			s3c64xx_flush_fifo(sdd);
@@ -939,9 +981,9 @@
 	sdd->cur_speed = 0;
 
 	if (sci->no_cs)
-		writel(0, sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+		writel(0, sdd->regs + S3C64XX_SPI_CS_REG);
 	else if (!(sdd->port_conf->quirks & S3C64XX_SPI_QUIRK_CS_AUTO))
-		writel(S3C64XX_SPI_SLAVE_SIG_INACT, sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+		writel(S3C64XX_SPI_CS_SIG_INACT, sdd->regs + S3C64XX_SPI_CS_REG);
 
 	/* Disable Interrupts - we use Polling if not DMA mode */
 	writel(0, regs + S3C64XX_SPI_INT_EN);
@@ -1336,6 +1378,10 @@
 
 	s3c64xx_spi_hwinit(sdd);
 
+	writel(S3C64XX_SPI_INT_RX_OVERRUN_EN | S3C64XX_SPI_INT_RX_UNDERRUN_EN |
+	       S3C64XX_SPI_INT_TX_OVERRUN_EN | S3C64XX_SPI_INT_TX_UNDERRUN_EN,
+	       sdd->regs + S3C64XX_SPI_INT_EN);
+
 	return 0;
 
 err_disable_src_clk:
@@ -1379,6 +1425,7 @@
 	.tx_st_done	= 25,
 	.high_speed	= true,
 	.clk_from_cmu	= true,
+	.quirks		= S3C64XX_SPI_QUIRK_CS_AUTO,
 };
 
 static struct s3c64xx_spi_port_config exynos7_spi_port_config = {
diff --git a/drivers/spi/spi-sprd-adi.c b/drivers/spi/spi-sprd-adi.c
index 127b8bd..392ec5c 100644
--- a/drivers/spi/spi-sprd-adi.c
+++ b/drivers/spi/spi-sprd-adi.c
@@ -504,10 +504,7 @@
 			dev_info(&pdev->dev, "no hardware spinlock supplied\n");
 			break;
 		default:
-			dev_err(&pdev->dev,
-				"failed to find hwlock id, %d\n", ret);
-			fallthrough;
-		case -EPROBE_DEFER:
+			dev_err_probe(&pdev->dev, ret, "failed to find hwlock id\n");
 			goto put_ctlr;
 		}
 	}
diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c
index 6678f1c..635738f 100644
--- a/drivers/spi/spi-sprd.c
+++ b/drivers/spi/spi-sprd.c
@@ -553,22 +553,15 @@
 static int sprd_spi_dma_request(struct sprd_spi *ss)
 {
 	ss->dma.dma_chan[SPRD_SPI_RX] = dma_request_chan(ss->dev, "rx_chn");
-	if (IS_ERR_OR_NULL(ss->dma.dma_chan[SPRD_SPI_RX])) {
-		if (PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]) == -EPROBE_DEFER)
-			return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]);
-
-		dev_err(ss->dev, "request RX DMA channel failed!\n");
-		return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]);
-	}
+	if (IS_ERR_OR_NULL(ss->dma.dma_chan[SPRD_SPI_RX]))
+		return dev_err_probe(ss->dev, PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]),
+				     "request RX DMA channel failed!\n");
 
 	ss->dma.dma_chan[SPRD_SPI_TX]  = dma_request_chan(ss->dev, "tx_chn");
 	if (IS_ERR_OR_NULL(ss->dma.dma_chan[SPRD_SPI_TX])) {
-		if (PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]) == -EPROBE_DEFER)
-			return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]);
-
-		dev_err(ss->dev, "request TX DMA channel failed!\n");
 		dma_release_channel(ss->dma.dma_chan[SPRD_SPI_RX]);
-		return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]);
+		return dev_err_probe(ss->dev, PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]),
+				     "request TX DMA channel failed!\n");
 	}
 
 	return 0;
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 3056428..2cc850e 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -804,10 +804,9 @@
 	struct spi_master *master = dev_id;
 	struct stm32_spi *spi = spi_master_get_devdata(master);
 	u32 sr, mask = 0;
-	unsigned long flags;
 	bool end = false;
 
-	spin_lock_irqsave(&spi->lock, flags);
+	spin_lock(&spi->lock);
 
 	sr = readl_relaxed(spi->base + STM32F4_SPI_SR);
 	/*
@@ -833,7 +832,7 @@
 
 	if (!(sr & mask)) {
 		dev_dbg(spi->dev, "spurious IT (sr=0x%08x)\n", sr);
-		spin_unlock_irqrestore(&spi->lock, flags);
+		spin_unlock(&spi->lock);
 		return IRQ_NONE;
 	}
 
@@ -875,11 +874,11 @@
 					STM32F4_SPI_CR2_TXEIE |
 					STM32F4_SPI_CR2_RXNEIE |
 					STM32F4_SPI_CR2_ERRIE);
-		spin_unlock_irqrestore(&spi->lock, flags);
+		spin_unlock(&spi->lock);
 		return IRQ_WAKE_THREAD;
 	}
 
-	spin_unlock_irqrestore(&spi->lock, flags);
+	spin_unlock(&spi->lock);
 	return IRQ_HANDLED;
 }
 
@@ -1861,9 +1860,7 @@
 
 	spi->irq = platform_get_irq(pdev, 0);
 	if (spi->irq <= 0) {
-		ret = spi->irq;
-		if (ret != -EPROBE_DEFER)
-			dev_err(&pdev->dev, "failed to get irq: %d\n", ret);
+		ret = dev_err_probe(&pdev->dev, spi->irq, "failed to get irq\n");
 		goto err_master_put;
 	}
 	ret = devm_request_threaded_irq(&pdev->dev, spi->irq,
diff --git a/drivers/spi/spi-synquacer.c b/drivers/spi/spi-synquacer.c
index ae17c99c..42e82db 100644
--- a/drivers/spi/spi-synquacer.c
+++ b/drivers/spi/spi-synquacer.c
@@ -640,9 +640,8 @@
 		}
 
 		if (IS_ERR(sspi->clk)) {
-			if (!(PTR_ERR(sspi->clk) == -EPROBE_DEFER))
-				dev_err(&pdev->dev, "clock not found\n");
-			ret = PTR_ERR(sspi->clk);
+			ret = dev_err_probe(&pdev->dev, PTR_ERR(sspi->clk),
+					    "clock not found\n");
 			goto put_spi;
 		}
 
diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index c2c5887..ca6886a 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -664,16 +664,11 @@
 	struct dma_chan *dma_chan;
 	u32 *dma_buf;
 	dma_addr_t dma_phys;
-	int ret;
 
 	dma_chan = dma_request_chan(tspi->dev, dma_to_memory ? "rx" : "tx");
-	if (IS_ERR(dma_chan)) {
-		ret = PTR_ERR(dma_chan);
-		if (ret != -EPROBE_DEFER)
-			dev_err(tspi->dev,
-				"Dma channel is not available: %d\n", ret);
-		return ret;
-	}
+	if (IS_ERR(dma_chan))
+		return dev_err_probe(tspi->dev, PTR_ERR(dma_chan),
+				     "Dma channel is not available\n");
 
 	dma_buf = dma_alloc_coherent(tspi->dev, tspi->dma_buf_size,
 				&dma_phys, GFP_KERNEL);
diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c
index 02cf5f4..b59015c 100644
--- a/drivers/spi/spi-tegra20-sflash.c
+++ b/drivers/spi/spi-tegra20-sflash.c
@@ -359,9 +359,8 @@
 static irqreturn_t handle_cpu_based_xfer(struct tegra_sflash_data *tsd)
 {
 	struct spi_transfer *t = tsd->curr_xfer;
-	unsigned long flags;
 
-	spin_lock_irqsave(&tsd->lock, flags);
+	spin_lock(&tsd->lock);
 	if (tsd->tx_status || tsd->rx_status || (tsd->status_reg & SPI_BSY)) {
 		dev_err(tsd->dev,
 			"CpuXfer ERROR bit set 0x%x\n", tsd->status_reg);
@@ -391,7 +390,7 @@
 	tegra_sflash_calculate_curr_xfer_param(tsd->cur_spi, tsd, t);
 	tegra_sflash_start_cpu_based_transfer(tsd, t);
 exit:
-	spin_unlock_irqrestore(&tsd->lock, flags);
+	spin_unlock(&tsd->lock);
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index a07b72e..a081076 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -600,13 +600,9 @@
 	struct dma_slave_config dma_sconfig;
 
 	dma_chan = dma_request_chan(tspi->dev, dma_to_memory ? "rx" : "tx");
-	if (IS_ERR(dma_chan)) {
-		ret = PTR_ERR(dma_chan);
-		if (ret != -EPROBE_DEFER)
-			dev_err(tspi->dev,
-				"Dma channel is not available: %d\n", ret);
-		return ret;
-	}
+	if (IS_ERR(dma_chan))
+		return dev_err_probe(tspi->dev, PTR_ERR(dma_chan),
+				     "Dma channel is not available\n");
 
 	dma_buf = dma_alloc_coherent(tspi->dev, tspi->dma_buf_size,
 				&dma_phys, GFP_KERNEL);
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 6df2aef..b459e36 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -1002,7 +1002,7 @@
 	spin_unlock_irqrestore(&data->lock, flags);
 
 	/* RX */
-	dma->sg_rx_p = kcalloc(num, sizeof(*dma->sg_rx_p), GFP_ATOMIC);
+	dma->sg_rx_p = kmalloc_array(num, sizeof(*dma->sg_rx_p), GFP_ATOMIC);
 	if (!dma->sg_rx_p)
 		return;
 
@@ -1065,7 +1065,7 @@
 		head = 0;
 	}
 
-	dma->sg_tx_p = kcalloc(num, sizeof(*dma->sg_tx_p), GFP_ATOMIC);
+	dma->sg_tx_p = kmalloc_array(num, sizeof(*dma->sg_tx_p), GFP_ATOMIC);
 	if (!dma->sg_tx_p)
 		return;
 
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index 8dd2bb9..523edfd 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -491,8 +491,7 @@
 		goto put_master;
 	}
 
-	dev_info(&pdev->dev, "at 0x%08llX mapped to 0x%p, irq=%d\n",
-		(unsigned long long)res->start, xspi->regs, xspi->irq);
+	dev_info(&pdev->dev, "at %pR, irq=%d\n", res, xspi->irq);
 
 	if (pdata) {
 		for (i = 0; i < pdata->num_devices; i++)
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index e17a201..c8fa6ee 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -21,6 +21,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <linux/spi/spi-mem.h>
 
 /* Generic QSPI register offsets */
 #define GQSPI_CONFIG_OFST		0x00000100
@@ -153,6 +154,7 @@
  * @dma_addr:		DMA address after mapping the kernel buffer
  * @genfifoentry:	Used for storing the genfifoentry instruction.
  * @mode:		Defines the mode in which QSPI is operating
+ * @data_completion:	completion structure
  */
 struct zynqmp_qspi {
 	void __iomem *regs;
@@ -170,12 +172,14 @@
 	dma_addr_t dma_addr;
 	u32 genfifoentry;
 	enum mode_type mode;
+	struct completion data_completion;
 };
 
 /**
- * zynqmp_gqspi_read:	For GQSPI controller read operation
+ * zynqmp_gqspi_read - For GQSPI controller read operation
  * @xqspi:	Pointer to the zynqmp_qspi structure
  * @offset:	Offset from where to read
+ * Return:      Value at the offset
  */
 static u32 zynqmp_gqspi_read(struct zynqmp_qspi *xqspi, u32 offset)
 {
@@ -183,7 +187,7 @@
 }
 
 /**
- * zynqmp_gqspi_write:	For GQSPI controller write operation
+ * zynqmp_gqspi_write - For GQSPI controller write operation
  * @xqspi:	Pointer to the zynqmp_qspi structure
  * @offset:	Offset where to write
  * @val:	Value to be written
@@ -195,7 +199,7 @@
 }
 
 /**
- * zynqmp_gqspi_selectslave:	For selection of slave device
+ * zynqmp_gqspi_selectslave - For selection of slave device
  * @instanceptr:	Pointer to the zynqmp_qspi structure
  * @slavecs:	For chip select
  * @slavebus:	To check which bus is selected- upper or lower
@@ -242,7 +246,7 @@
 }
 
 /**
- * zynqmp_qspi_init_hw:	Initialize the hardware
+ * zynqmp_qspi_init_hw - Initialize the hardware
  * @xqspi:	Pointer to the zynqmp_qspi structure
  *
  * The default settings of the QSPI controller's configurable parameters on
@@ -322,15 +326,15 @@
 				 GQSPI_SELECT_FLASH_BUS_LOWER);
 	/* Initialize DMA */
 	zynqmp_gqspi_write(xqspi,
-			GQSPI_QSPIDMA_DST_CTRL_OFST,
-			GQSPI_QSPIDMA_DST_CTRL_RESET_VAL);
+			   GQSPI_QSPIDMA_DST_CTRL_OFST,
+			   GQSPI_QSPIDMA_DST_CTRL_RESET_VAL);
 
 	/* Enable the GQSPI */
 	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, GQSPI_EN_MASK);
 }
 
 /**
- * zynqmp_qspi_copy_read_data:	Copy data to RX buffer
+ * zynqmp_qspi_copy_read_data - Copy data to RX buffer
  * @xqspi:	Pointer to the zynqmp_qspi structure
  * @data:	The variable where data is stored
  * @size:	Number of bytes to be copied from data to RX buffer
@@ -344,41 +348,7 @@
 }
 
 /**
- * zynqmp_prepare_transfer_hardware:	Prepares hardware for transfer.
- * @master:	Pointer to the spi_master structure which provides
- *		information about the controller.
- *
- * This function enables SPI master controller.
- *
- * Return:	0 on success; error value otherwise
- */
-static int zynqmp_prepare_transfer_hardware(struct spi_master *master)
-{
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-
-	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, GQSPI_EN_MASK);
-	return 0;
-}
-
-/**
- * zynqmp_unprepare_transfer_hardware:	Relaxes hardware after transfer
- * @master:	Pointer to the spi_master structure which provides
- *		information about the controller.
- *
- * This function disables the SPI master controller.
- *
- * Return:	Always 0
- */
-static int zynqmp_unprepare_transfer_hardware(struct spi_master *master)
-{
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-
-	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
-	return 0;
-}
-
-/**
- * zynqmp_qspi_chipselect:	Select or deselect the chip select line
+ * zynqmp_qspi_chipselect - Select or deselect the chip select line
  * @qspi:	Pointer to the spi_device structure
  * @is_high:	Select(0) or deselect (1) the chip select line
  */
@@ -386,12 +356,14 @@
 {
 	struct zynqmp_qspi *xqspi = spi_master_get_devdata(qspi->master);
 	ulong timeout;
-	u32 genfifoentry = 0x0, statusreg;
+	u32 genfifoentry = 0, statusreg;
 
 	genfifoentry |= GQSPI_GENFIFO_MODE_SPI;
-	genfifoentry |= xqspi->genfifobus;
 
 	if (!is_high) {
+		xqspi->genfifobus = GQSPI_GENFIFO_BUS_LOWER;
+		xqspi->genfifocs = GQSPI_GENFIFO_CS_LOWER;
+		genfifoentry |= xqspi->genfifobus;
 		genfifoentry |= xqspi->genfifocs;
 		genfifoentry |= GQSPI_GENFIFO_CS_SETUP;
 	} else {
@@ -402,8 +374,8 @@
 
 	/* Manually start the generic FIFO command */
 	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
-			zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
-			GQSPI_CFG_START_GEN_FIFO_MASK);
+			   zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
+			   GQSPI_CFG_START_GEN_FIFO_MASK);
 
 	timeout = jiffies + msecs_to_jiffies(1000);
 
@@ -412,10 +384,9 @@
 		statusreg = zynqmp_gqspi_read(xqspi, GQSPI_ISR_OFST);
 
 		if ((statusreg & GQSPI_ISR_GENFIFOEMPTY_MASK) &&
-			(statusreg & GQSPI_ISR_TXEMPTY_MASK))
+		    (statusreg & GQSPI_ISR_TXEMPTY_MASK))
 			break;
-		else
-			cpu_relax();
+		cpu_relax();
 	} while (!time_after_eq(jiffies, timeout));
 
 	if (time_after_eq(jiffies, timeout))
@@ -423,11 +394,38 @@
 }
 
 /**
- * zynqmp_qspi_setup_transfer:	Configure QSPI controller for specified
+ * zynqmp_qspi_selectspimode - Selects SPI mode - x1 or x2 or x4.
+ * @xqspi:	xqspi is a pointer to the GQSPI instance
+ * @spimode:	spimode - SPI or DUAL or QUAD.
+ * Return:	Mask to set desired SPI mode in GENFIFO entry.
+ */
+static inline u32 zynqmp_qspi_selectspimode(struct zynqmp_qspi *xqspi,
+					    u8 spimode)
+{
+	u32 mask = 0;
+
+	switch (spimode) {
+	case GQSPI_SELECT_MODE_DUALSPI:
+		mask = GQSPI_GENFIFO_MODE_DUALSPI;
+		break;
+	case GQSPI_SELECT_MODE_QUADSPI:
+		mask = GQSPI_GENFIFO_MODE_QUADSPI;
+		break;
+	case GQSPI_SELECT_MODE_SPI:
+		mask = GQSPI_GENFIFO_MODE_SPI;
+		break;
+	default:
+		dev_warn(xqspi->dev, "Invalid SPI mode\n");
+	}
+
+	return mask;
+}
+
+/**
+ * zynqmp_qspi_config_op - Configure QSPI controller for specified
  *				transfer
+ * @xqspi:	Pointer to the zynqmp_qspi structure
  * @qspi:	Pointer to the spi_device structure
- * @transfer:	Pointer to the spi_transfer structure which provides
- *		information about next transfer setup parameters
  *
  * Sets the operational mode of QSPI controller for the next QSPI transfer and
  * sets the requested clock frequency.
@@ -444,17 +442,11 @@
  *	by the QSPI controller the driver will set the highest or lowest
  *	frequency supported by controller.
  */
-static int zynqmp_qspi_setup_transfer(struct spi_device *qspi,
-				      struct spi_transfer *transfer)
+static int zynqmp_qspi_config_op(struct zynqmp_qspi *xqspi,
+				 struct spi_device *qspi)
 {
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(qspi->master);
 	ulong clk_rate;
-	u32 config_reg, req_hz, baud_rate_val = 0;
-
-	if (transfer)
-		req_hz = transfer->speed_hz;
-	else
-		req_hz = qspi->max_speed_hz;
+	u32 config_reg, baud_rate_val = 0;
 
 	/* Set the clock frequency */
 	/* If req_hz == 0, default to lowest speed */
@@ -462,7 +454,7 @@
 
 	while ((baud_rate_val < GQSPI_BAUD_DIV_MAX) &&
 	       (clk_rate /
-		(GQSPI_BAUD_DIV_SHIFT << baud_rate_val)) > req_hz)
+		(GQSPI_BAUD_DIV_SHIFT << baud_rate_val)) > qspi->max_speed_hz)
 		baud_rate_val++;
 
 	config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
@@ -482,7 +474,7 @@
 }
 
 /**
- * zynqmp_qspi_setup:	Configure the QSPI controller
+ * zynqmp_qspi_setup_op - Configure the QSPI controller
  * @qspi:	Pointer to the spi_device structure
  *
  * Sets the operational mode of QSPI controller for the next QSPI transfer,
@@ -490,15 +482,35 @@
  *
  * Return:	0 on success; error value otherwise.
  */
-static int zynqmp_qspi_setup(struct spi_device *qspi)
+static int zynqmp_qspi_setup_op(struct spi_device *qspi)
 {
-	if (qspi->master->busy)
+	struct spi_controller *ctlr = qspi->master;
+	struct zynqmp_qspi *xqspi = spi_controller_get_devdata(ctlr);
+	struct device *dev = &ctlr->dev;
+	int ret;
+
+	if (ctlr->busy)
 		return -EBUSY;
+
+	ret = clk_enable(xqspi->refclk);
+	if (ret) {
+		dev_err(dev, "Cannot enable device clock.\n");
+		return ret;
+	}
+
+	ret = clk_enable(xqspi->pclk);
+	if (ret) {
+		dev_err(dev, "Cannot enable APB clock.\n");
+		clk_disable(xqspi->refclk);
+		return ret;
+	}
+	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, GQSPI_EN_MASK);
+
 	return 0;
 }
 
 /**
- * zynqmp_qspi_filltxfifo:	Fills the TX FIFO as long as there is room in
+ * zynqmp_qspi_filltxfifo - Fills the TX FIFO as long as there is room in
  *				the FIFO or the bytes required to be
  *				transmitted.
  * @xqspi:	Pointer to the zynqmp_qspi structure
@@ -524,7 +536,7 @@
 }
 
 /**
- * zynqmp_qspi_readrxfifo:	Fills the RX FIFO as long as there is room in
+ * zynqmp_qspi_readrxfifo - Fills the RX FIFO as long as there is room in
  *				the FIFO.
  * @xqspi:	Pointer to the zynqmp_qspi structure
  * @size:	Number of bytes to be copied from RX buffer to RX FIFO
@@ -536,7 +548,7 @@
 
 	while ((count < size) && (xqspi->bytes_to_receive > 0)) {
 		if (xqspi->bytes_to_receive >= 4) {
-			(*(u32 *) xqspi->rxbuf) =
+			(*(u32 *)xqspi->rxbuf) =
 			zynqmp_gqspi_read(xqspi, GQSPI_RXD_OFST);
 			xqspi->rxbuf += 4;
 			xqspi->bytes_to_receive -= 4;
@@ -552,272 +564,33 @@
 }
 
 /**
- * zynqmp_process_dma_irq:	Handler for DMA done interrupt of QSPI
- *				controller
- * @xqspi:	zynqmp_qspi instance pointer
- *
- * This function handles DMA interrupt only.
+ * zynqmp_qspi_fillgenfifo - Fills the GENFIFO.
+ * @xqspi:	Pointer to the zynqmp_qspi structure
+ * @nbits:	Transfer/Receive buswidth.
+ * @genfifoentry:       Variable in which GENFIFO mask is saved
  */
-static void zynqmp_process_dma_irq(struct zynqmp_qspi *xqspi)
+static void zynqmp_qspi_fillgenfifo(struct zynqmp_qspi *xqspi, u8 nbits,
+				    u32 genfifoentry)
 {
-	u32 config_reg, genfifoentry;
+	u32 transfer_len = 0;
 
-	dma_unmap_single(xqspi->dev, xqspi->dma_addr,
-				xqspi->dma_rx_bytes, DMA_FROM_DEVICE);
-	xqspi->rxbuf += xqspi->dma_rx_bytes;
-	xqspi->bytes_to_receive -= xqspi->dma_rx_bytes;
-	xqspi->dma_rx_bytes = 0;
-
-	/* Disabling the DMA interrupts */
-	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_I_DIS_OFST,
-					GQSPI_QSPIDMA_DST_I_EN_DONE_MASK);
-
-	if (xqspi->bytes_to_receive > 0) {
-		/* Switch to IO mode,for remaining bytes to receive */
-		config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
-		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
-		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
-
-		/* Initiate the transfer of remaining bytes */
-		genfifoentry = xqspi->genfifoentry;
-		genfifoentry |= xqspi->bytes_to_receive;
-		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, genfifoentry);
-
-		/* Dummy generic FIFO entry */
-		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0);
-
-		/* Manual start */
-		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
-			(zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
-			GQSPI_CFG_START_GEN_FIFO_MASK));
-
-		/* Enable the RX interrupts for IO mode */
-		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
-				GQSPI_IER_GENFIFOEMPTY_MASK |
-				GQSPI_IER_RXNEMPTY_MASK |
-				GQSPI_IER_RXEMPTY_MASK);
+	if (xqspi->txbuf) {
+		genfifoentry &= ~GQSPI_GENFIFO_RX;
+		genfifoentry |= GQSPI_GENFIFO_DATA_XFER;
+		genfifoentry |= GQSPI_GENFIFO_TX;
+		transfer_len = xqspi->bytes_to_transfer;
+	} else {
+		genfifoentry &= ~GQSPI_GENFIFO_TX;
+		genfifoentry |= GQSPI_GENFIFO_DATA_XFER;
+		genfifoentry |= GQSPI_GENFIFO_RX;
+		if (xqspi->mode == GQSPI_MODE_DMA)
+			transfer_len = xqspi->dma_rx_bytes;
+		else
+			transfer_len = xqspi->bytes_to_receive;
 	}
-}
-
-/**
- * zynqmp_qspi_irq:	Interrupt service routine of the QSPI controller
- * @irq:	IRQ number
- * @dev_id:	Pointer to the xqspi structure
- *
- * This function handles TX empty only.
- * On TX empty interrupt this function reads the received data from RX FIFO
- * and fills the TX FIFO if there is any data remaining to be transferred.
- *
- * Return:	IRQ_HANDLED when interrupt is handled
- *		IRQ_NONE otherwise.
- */
-static irqreturn_t zynqmp_qspi_irq(int irq, void *dev_id)
-{
-	struct spi_master *master = dev_id;
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-	int ret = IRQ_NONE;
-	u32 status, mask, dma_status = 0;
-
-	status = zynqmp_gqspi_read(xqspi, GQSPI_ISR_OFST);
-	zynqmp_gqspi_write(xqspi, GQSPI_ISR_OFST, status);
-	mask = (status & ~(zynqmp_gqspi_read(xqspi, GQSPI_IMASK_OFST)));
-
-	/* Read and clear DMA status */
-	if (xqspi->mode == GQSPI_MODE_DMA) {
-		dma_status =
-			zynqmp_gqspi_read(xqspi, GQSPI_QSPIDMA_DST_I_STS_OFST);
-		zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_I_STS_OFST,
-								dma_status);
-	}
-
-	if (mask & GQSPI_ISR_TXNOT_FULL_MASK) {
-		zynqmp_qspi_filltxfifo(xqspi, GQSPI_TX_FIFO_FILL);
-		ret = IRQ_HANDLED;
-	}
-
-	if (dma_status & GQSPI_QSPIDMA_DST_I_STS_DONE_MASK) {
-		zynqmp_process_dma_irq(xqspi);
-		ret = IRQ_HANDLED;
-	} else if (!(mask & GQSPI_IER_RXEMPTY_MASK) &&
-			(mask & GQSPI_IER_GENFIFOEMPTY_MASK)) {
-		zynqmp_qspi_readrxfifo(xqspi, GQSPI_RX_FIFO_FILL);
-		ret = IRQ_HANDLED;
-	}
-
-	if ((xqspi->bytes_to_receive == 0) && (xqspi->bytes_to_transfer == 0)
-			&& ((status & GQSPI_IRQ_MASK) == GQSPI_IRQ_MASK)) {
-		zynqmp_gqspi_write(xqspi, GQSPI_IDR_OFST, GQSPI_ISR_IDR_MASK);
-		spi_finalize_current_transfer(master);
-		ret = IRQ_HANDLED;
-	}
-	return ret;
-}
-
-/**
- * zynqmp_qspi_selectspimode:	Selects SPI mode - x1 or x2 or x4.
- * @xqspi:	xqspi is a pointer to the GQSPI instance
- * @spimode:	spimode - SPI or DUAL or QUAD.
- * Return:	Mask to set desired SPI mode in GENFIFO entry.
- */
-static inline u32 zynqmp_qspi_selectspimode(struct zynqmp_qspi *xqspi,
-						u8 spimode)
-{
-	u32 mask = 0;
-
-	switch (spimode) {
-	case GQSPI_SELECT_MODE_DUALSPI:
-		mask = GQSPI_GENFIFO_MODE_DUALSPI;
-		break;
-	case GQSPI_SELECT_MODE_QUADSPI:
-		mask = GQSPI_GENFIFO_MODE_QUADSPI;
-		break;
-	case GQSPI_SELECT_MODE_SPI:
-		mask = GQSPI_GENFIFO_MODE_SPI;
-		break;
-	default:
-		dev_warn(xqspi->dev, "Invalid SPI mode\n");
-	}
-
-	return mask;
-}
-
-/**
- * zynq_qspi_setuprxdma:	This function sets up the RX DMA operation
- * @xqspi:	xqspi is a pointer to the GQSPI instance.
- */
-static void zynq_qspi_setuprxdma(struct zynqmp_qspi *xqspi)
-{
-	u32 rx_bytes, rx_rem, config_reg;
-	dma_addr_t addr;
-	u64 dma_align =  (u64)(uintptr_t)xqspi->rxbuf;
-
-	if ((xqspi->bytes_to_receive < 8) ||
-		((dma_align & GQSPI_DMA_UNALIGN) != 0x0)) {
-		/* Setting to IO mode */
-		config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
-		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
-		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
-		xqspi->mode = GQSPI_MODE_IO;
-		xqspi->dma_rx_bytes = 0;
-		return;
-	}
-
-	rx_rem = xqspi->bytes_to_receive % 4;
-	rx_bytes = (xqspi->bytes_to_receive - rx_rem);
-
-	addr = dma_map_single(xqspi->dev, (void *)xqspi->rxbuf,
-						rx_bytes, DMA_FROM_DEVICE);
-	if (dma_mapping_error(xqspi->dev, addr))
-		dev_err(xqspi->dev, "ERR:rxdma:memory not mapped\n");
-
-	xqspi->dma_rx_bytes = rx_bytes;
-	xqspi->dma_addr = addr;
-	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_ADDR_OFST,
-				(u32)(addr & 0xffffffff));
-	addr = ((addr >> 16) >> 16);
-	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_ADDR_MSB_OFST,
-				((u32)addr) & 0xfff);
-
-	/* Enabling the DMA mode */
-	config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
-	config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
-	config_reg |= GQSPI_CFG_MODE_EN_DMA_MASK;
-	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
-
-	/* Switch to DMA mode */
-	xqspi->mode = GQSPI_MODE_DMA;
-
-	/* Write the number of bytes to transfer */
-	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_SIZE_OFST, rx_bytes);
-}
-
-/**
- * zynqmp_qspi_txrxsetup:	This function checks the TX/RX buffers in
- *				the transfer and sets up the GENFIFO entries,
- *				TX FIFO as required.
- * @xqspi:	xqspi is a pointer to the GQSPI instance.
- * @transfer:	It is a pointer to the structure containing transfer data.
- * @genfifoentry:	genfifoentry is pointer to the variable in which
- *			GENFIFO	mask is returned to calling function
- */
-static void zynqmp_qspi_txrxsetup(struct zynqmp_qspi *xqspi,
-				  struct spi_transfer *transfer,
-				  u32 *genfifoentry)
-{
-	u32 config_reg;
-
-	/* Transmit */
-	if ((xqspi->txbuf != NULL) && (xqspi->rxbuf == NULL)) {
-		/* Setup data to be TXed */
-		*genfifoentry &= ~GQSPI_GENFIFO_RX;
-		*genfifoentry |= GQSPI_GENFIFO_DATA_XFER;
-		*genfifoentry |= GQSPI_GENFIFO_TX;
-		*genfifoentry |=
-			zynqmp_qspi_selectspimode(xqspi, transfer->tx_nbits);
-		xqspi->bytes_to_transfer = transfer->len;
-		if (xqspi->mode == GQSPI_MODE_DMA) {
-			config_reg = zynqmp_gqspi_read(xqspi,
-							GQSPI_CONFIG_OFST);
-			config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
-			zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
-								config_reg);
-			xqspi->mode = GQSPI_MODE_IO;
-		}
-		zynqmp_qspi_filltxfifo(xqspi, GQSPI_TXD_DEPTH);
-		/* Discard RX data */
-		xqspi->bytes_to_receive = 0;
-	} else if ((xqspi->txbuf == NULL) && (xqspi->rxbuf != NULL)) {
-		/* Receive */
-
-		/* TX auto fill */
-		*genfifoentry &= ~GQSPI_GENFIFO_TX;
-		/* Setup RX */
-		*genfifoentry |= GQSPI_GENFIFO_DATA_XFER;
-		*genfifoentry |= GQSPI_GENFIFO_RX;
-		*genfifoentry |=
-			zynqmp_qspi_selectspimode(xqspi, transfer->rx_nbits);
-		xqspi->bytes_to_transfer = 0;
-		xqspi->bytes_to_receive = transfer->len;
-		zynq_qspi_setuprxdma(xqspi);
-	}
-}
-
-/**
- * zynqmp_qspi_start_transfer:	Initiates the QSPI transfer
- * @master:	Pointer to the spi_master structure which provides
- *		information about the controller.
- * @qspi:	Pointer to the spi_device structure
- * @transfer:	Pointer to the spi_transfer structure which provide information
- *		about next transfer parameters
- *
- * This function fills the TX FIFO, starts the QSPI transfer, and waits for the
- * transfer to be completed.
- *
- * Return:	Number of bytes transferred in the last transfer
- */
-static int zynqmp_qspi_start_transfer(struct spi_master *master,
-				      struct spi_device *qspi,
-				      struct spi_transfer *transfer)
-{
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-	u32 genfifoentry = 0x0, transfer_len;
-
-	xqspi->txbuf = transfer->tx_buf;
-	xqspi->rxbuf = transfer->rx_buf;
-
-	zynqmp_qspi_setup_transfer(qspi, transfer);
-
-	genfifoentry |= xqspi->genfifocs;
-	genfifoentry |= xqspi->genfifobus;
-
-	zynqmp_qspi_txrxsetup(xqspi, transfer, &genfifoentry);
-
-	if (xqspi->mode == GQSPI_MODE_DMA)
-		transfer_len = xqspi->dma_rx_bytes;
-	else
-		transfer_len = transfer->len;
-
+	genfifoentry |= zynqmp_qspi_selectspimode(xqspi, nbits);
 	xqspi->genfifoentry = genfifoentry;
+
 	if ((transfer_len) < GQSPI_GENFIFO_IMM_DATA_MASK) {
 		genfifoentry &= ~GQSPI_GENFIFO_IMM_DATA_MASK;
 		genfifoentry |= transfer_len;
@@ -835,7 +608,7 @@
 			while (tempcount != 0) {
 				if (tempcount & GQSPI_GENFIFO_EXP_START) {
 					genfifoentry &=
-					    ~GQSPI_GENFIFO_IMM_DATA_MASK;
+						~GQSPI_GENFIFO_IMM_DATA_MASK;
 					genfifoentry |= exponent;
 					zynqmp_gqspi_write(xqspi,
 							   GQSPI_GEN_FIFO_OFST,
@@ -848,50 +621,212 @@
 		if (imm_data != 0) {
 			genfifoentry &= ~GQSPI_GENFIFO_EXP;
 			genfifoentry &= ~GQSPI_GENFIFO_IMM_DATA_MASK;
-			genfifoentry |= (u8) (imm_data & 0xFF);
-			zynqmp_gqspi_write(xqspi,
-					   GQSPI_GEN_FIFO_OFST, genfifoentry);
+			genfifoentry |= (u8)(imm_data & 0xFF);
+			zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST,
+					   genfifoentry);
 		}
 	}
-
-	if ((xqspi->mode == GQSPI_MODE_IO) &&
-			(xqspi->rxbuf != NULL)) {
+	if (xqspi->mode == GQSPI_MODE_IO && xqspi->rxbuf) {
 		/* Dummy generic FIFO entry */
 		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0);
 	}
-
-	/* Since we are using manual mode */
-	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
-			   zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
-			   GQSPI_CFG_START_GEN_FIFO_MASK);
-
-	if (xqspi->txbuf != NULL)
-		/* Enable interrupts for TX */
-		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
-				   GQSPI_IER_TXEMPTY_MASK |
-					GQSPI_IER_GENFIFOEMPTY_MASK |
-					GQSPI_IER_TXNOT_FULL_MASK);
-
-	if (xqspi->rxbuf != NULL) {
-		/* Enable interrupts for RX */
-		if (xqspi->mode == GQSPI_MODE_DMA) {
-			/* Enable DMA interrupts */
-			zynqmp_gqspi_write(xqspi,
-					GQSPI_QSPIDMA_DST_I_EN_OFST,
-					GQSPI_QSPIDMA_DST_I_EN_DONE_MASK);
-		} else {
-			zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
-					GQSPI_IER_GENFIFOEMPTY_MASK |
-					GQSPI_IER_RXNEMPTY_MASK |
-					GQSPI_IER_RXEMPTY_MASK);
-		}
-	}
-
-	return transfer->len;
 }
 
 /**
- * zynqmp_qspi_suspend:	Suspend method for the QSPI driver
+ * zynqmp_process_dma_irq - Handler for DMA done interrupt of QSPI
+ *				controller
+ * @xqspi:	zynqmp_qspi instance pointer
+ *
+ * This function handles DMA interrupt only.
+ */
+static void zynqmp_process_dma_irq(struct zynqmp_qspi *xqspi)
+{
+	u32 config_reg, genfifoentry;
+
+	dma_unmap_single(xqspi->dev, xqspi->dma_addr,
+			 xqspi->dma_rx_bytes, DMA_FROM_DEVICE);
+	xqspi->rxbuf += xqspi->dma_rx_bytes;
+	xqspi->bytes_to_receive -= xqspi->dma_rx_bytes;
+	xqspi->dma_rx_bytes = 0;
+
+	/* Disabling the DMA interrupts */
+	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_I_DIS_OFST,
+			   GQSPI_QSPIDMA_DST_I_EN_DONE_MASK);
+
+	if (xqspi->bytes_to_receive > 0) {
+		/* Switch to IO mode,for remaining bytes to receive */
+		config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
+		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
+
+		/* Initiate the transfer of remaining bytes */
+		genfifoentry = xqspi->genfifoentry;
+		genfifoentry |= xqspi->bytes_to_receive;
+		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, genfifoentry);
+
+		/* Dummy generic FIFO entry */
+		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0);
+
+		/* Manual start */
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   (zynqmp_gqspi_read(xqspi,
+						      GQSPI_CONFIG_OFST) |
+				   GQSPI_CFG_START_GEN_FIFO_MASK));
+
+		/* Enable the RX interrupts for IO mode */
+		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+				   GQSPI_IER_GENFIFOEMPTY_MASK |
+				   GQSPI_IER_RXNEMPTY_MASK |
+				   GQSPI_IER_RXEMPTY_MASK);
+	}
+}
+
+/**
+ * zynqmp_qspi_irq - Interrupt service routine of the QSPI controller
+ * @irq:	IRQ number
+ * @dev_id:	Pointer to the xqspi structure
+ *
+ * This function handles TX empty only.
+ * On TX empty interrupt this function reads the received data from RX FIFO
+ * and fills the TX FIFO if there is any data remaining to be transferred.
+ *
+ * Return:	IRQ_HANDLED when interrupt is handled
+ *		IRQ_NONE otherwise.
+ */
+static irqreturn_t zynqmp_qspi_irq(int irq, void *dev_id)
+{
+	struct zynqmp_qspi *xqspi = (struct zynqmp_qspi *)dev_id;
+	irqreturn_t ret = IRQ_NONE;
+	u32 status, mask, dma_status = 0;
+
+	status = zynqmp_gqspi_read(xqspi, GQSPI_ISR_OFST);
+	zynqmp_gqspi_write(xqspi, GQSPI_ISR_OFST, status);
+	mask = (status & ~(zynqmp_gqspi_read(xqspi, GQSPI_IMASK_OFST)));
+
+	/* Read and clear DMA status */
+	if (xqspi->mode == GQSPI_MODE_DMA) {
+		dma_status =
+			zynqmp_gqspi_read(xqspi, GQSPI_QSPIDMA_DST_I_STS_OFST);
+		zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_I_STS_OFST,
+				   dma_status);
+	}
+
+	if (mask & GQSPI_ISR_TXNOT_FULL_MASK) {
+		zynqmp_qspi_filltxfifo(xqspi, GQSPI_TX_FIFO_FILL);
+		ret = IRQ_HANDLED;
+	}
+
+	if (dma_status & GQSPI_QSPIDMA_DST_I_STS_DONE_MASK) {
+		zynqmp_process_dma_irq(xqspi);
+		ret = IRQ_HANDLED;
+	} else if (!(mask & GQSPI_IER_RXEMPTY_MASK) &&
+			(mask & GQSPI_IER_GENFIFOEMPTY_MASK)) {
+		zynqmp_qspi_readrxfifo(xqspi, GQSPI_RX_FIFO_FILL);
+		ret = IRQ_HANDLED;
+	}
+
+	if (xqspi->bytes_to_receive == 0 && xqspi->bytes_to_transfer == 0 &&
+	    ((status & GQSPI_IRQ_MASK) == GQSPI_IRQ_MASK)) {
+		zynqmp_gqspi_write(xqspi, GQSPI_IDR_OFST, GQSPI_ISR_IDR_MASK);
+		complete(&xqspi->data_completion);
+		ret = IRQ_HANDLED;
+	}
+	return ret;
+}
+
+/**
+ * zynqmp_qspi_setuprxdma - This function sets up the RX DMA operation
+ * @xqspi:	xqspi is a pointer to the GQSPI instance.
+ */
+static void zynqmp_qspi_setuprxdma(struct zynqmp_qspi *xqspi)
+{
+	u32 rx_bytes, rx_rem, config_reg;
+	dma_addr_t addr;
+	u64 dma_align =  (u64)(uintptr_t)xqspi->rxbuf;
+
+	if (xqspi->bytes_to_receive < 8 ||
+	    ((dma_align & GQSPI_DMA_UNALIGN) != 0x0)) {
+		/* Setting to IO mode */
+		config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
+		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
+		xqspi->mode = GQSPI_MODE_IO;
+		xqspi->dma_rx_bytes = 0;
+		return;
+	}
+
+	rx_rem = xqspi->bytes_to_receive % 4;
+	rx_bytes = (xqspi->bytes_to_receive - rx_rem);
+
+	addr = dma_map_single(xqspi->dev, (void *)xqspi->rxbuf,
+			      rx_bytes, DMA_FROM_DEVICE);
+	if (dma_mapping_error(xqspi->dev, addr))
+		dev_err(xqspi->dev, "ERR:rxdma:memory not mapped\n");
+
+	xqspi->dma_rx_bytes = rx_bytes;
+	xqspi->dma_addr = addr;
+	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_ADDR_OFST,
+			   (u32)(addr & 0xffffffff));
+	addr = ((addr >> 16) >> 16);
+	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_ADDR_MSB_OFST,
+			   ((u32)addr) & 0xfff);
+
+	/* Enabling the DMA mode */
+	config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
+	config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
+	config_reg |= GQSPI_CFG_MODE_EN_DMA_MASK;
+	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
+
+	/* Switch to DMA mode */
+	xqspi->mode = GQSPI_MODE_DMA;
+
+	/* Write the number of bytes to transfer */
+	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_SIZE_OFST, rx_bytes);
+}
+
+/**
+ * zynqmp_qspi_write_op - This function sets up the GENFIFO entries,
+ *			TX FIFO, and fills the TX FIFO with as many
+ *			bytes as possible.
+ * @xqspi:	Pointer to the GQSPI instance.
+ * @tx_nbits:	Transfer buswidth.
+ * @genfifoentry:	Variable in which GENFIFO mask is returned
+ *			to calling function
+ */
+static void zynqmp_qspi_write_op(struct zynqmp_qspi *xqspi, u8 tx_nbits,
+				 u32 genfifoentry)
+{
+	u32 config_reg;
+
+	zynqmp_qspi_fillgenfifo(xqspi, tx_nbits, genfifoentry);
+	zynqmp_qspi_filltxfifo(xqspi, GQSPI_TXD_DEPTH);
+	if (xqspi->mode == GQSPI_MODE_DMA) {
+		config_reg = zynqmp_gqspi_read(xqspi,
+					       GQSPI_CONFIG_OFST);
+		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   config_reg);
+		xqspi->mode = GQSPI_MODE_IO;
+	}
+}
+
+/**
+ * zynqmp_qspi_read_op - This function sets up the GENFIFO entries and
+ *				RX DMA operation.
+ * @xqspi:	xqspi is a pointer to the GQSPI instance.
+ * @rx_nbits:	Receive buswidth.
+ * @genfifoentry:	genfifoentry is pointer to the variable in which
+ *			GENFIFO	mask is returned to calling function
+ */
+static void zynqmp_qspi_read_op(struct zynqmp_qspi *xqspi, u8 rx_nbits,
+				u32 genfifoentry)
+{
+	zynqmp_qspi_fillgenfifo(xqspi, rx_nbits, genfifoentry);
+	zynqmp_qspi_setuprxdma(xqspi);
+}
+
+/**
+ * zynqmp_qspi_suspend - Suspend method for the QSPI driver
  * @dev:	Address of the platform_device structure
  *
  * This function stops the QSPI driver queue and disables the QSPI controller
@@ -900,17 +835,18 @@
  */
 static int __maybe_unused zynqmp_qspi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct zynqmp_qspi *xqspi = spi_controller_get_devdata(ctlr);
 
-	spi_master_suspend(master);
+	spi_controller_suspend(ctlr);
 
-	zynqmp_unprepare_transfer_hardware(master);
+	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
 
 	return 0;
 }
 
 /**
- * zynqmp_qspi_resume:	Resume method for the QSPI driver
+ * zynqmp_qspi_resume - Resume method for the QSPI driver
  * @dev:	Address of the platform_device structure
  *
  * The function starts the QSPI driver queue and initializes the QSPI
@@ -920,8 +856,8 @@
  */
 static int __maybe_unused zynqmp_qspi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct zynqmp_qspi *xqspi = spi_controller_get_devdata(ctlr);
 	int ret = 0;
 
 	ret = clk_enable(xqspi->pclk);
@@ -937,7 +873,7 @@
 		return ret;
 	}
 
-	spi_master_resume(master);
+	spi_controller_resume(ctlr);
 
 	clk_disable(xqspi->refclk);
 	clk_disable(xqspi->pclk);
@@ -954,8 +890,7 @@
  */
 static int __maybe_unused zynqmp_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+	struct zynqmp_qspi *xqspi = (struct zynqmp_qspi *)dev_get_drvdata(dev);
 
 	clk_disable(xqspi->refclk);
 	clk_disable(xqspi->pclk);
@@ -973,8 +908,7 @@
  */
 static int __maybe_unused zynqmp_runtime_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+	struct zynqmp_qspi *xqspi = (struct zynqmp_qspi *)dev_get_drvdata(dev);
 	int ret;
 
 	ret = clk_enable(xqspi->pclk);
@@ -993,14 +927,179 @@
 	return 0;
 }
 
+/**
+ * zynqmp_qspi_exec_op() - Initiates the QSPI transfer
+ * @mem: The SPI memory
+ * @op: The memory operation to execute
+ *
+ * Executes a memory operation.
+ *
+ * This function first selects the chip and starts the memory operation.
+ *
+ * Return: 0 in case of success, a negative error code otherwise.
+ */
+static int zynqmp_qspi_exec_op(struct spi_mem *mem,
+			       const struct spi_mem_op *op)
+{
+	struct zynqmp_qspi *xqspi = spi_controller_get_devdata
+				    (mem->spi->master);
+	int err = 0, i;
+	u8 *tmpbuf;
+	u32 genfifoentry = 0;
+
+	dev_dbg(xqspi->dev, "cmd:%#x mode:%d.%d.%d.%d\n",
+		op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
+		op->dummy.buswidth, op->data.buswidth);
+
+	zynqmp_qspi_config_op(xqspi, mem->spi);
+	zynqmp_qspi_chipselect(mem->spi, false);
+	genfifoentry |= xqspi->genfifocs;
+	genfifoentry |= xqspi->genfifobus;
+
+	if (op->cmd.opcode) {
+		tmpbuf = kzalloc(op->cmd.nbytes, GFP_KERNEL | GFP_DMA);
+		if (!tmpbuf)
+			return -ENOMEM;
+		tmpbuf[0] = op->cmd.opcode;
+		reinit_completion(&xqspi->data_completion);
+		xqspi->txbuf = tmpbuf;
+		xqspi->rxbuf = NULL;
+		xqspi->bytes_to_transfer = op->cmd.nbytes;
+		xqspi->bytes_to_receive = 0;
+		zynqmp_qspi_write_op(xqspi, op->cmd.buswidth, genfifoentry);
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
+				   GQSPI_CFG_START_GEN_FIFO_MASK);
+		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+				   GQSPI_IER_GENFIFOEMPTY_MASK |
+				   GQSPI_IER_TXNOT_FULL_MASK);
+		if (!wait_for_completion_interruptible_timeout
+		    (&xqspi->data_completion, msecs_to_jiffies(1000))) {
+			err = -ETIMEDOUT;
+			kfree(tmpbuf);
+			goto return_err;
+		}
+		kfree(tmpbuf);
+	}
+
+	if (op->addr.nbytes) {
+		for (i = 0; i < op->addr.nbytes; i++) {
+			*(((u8 *)xqspi->txbuf) + i) = op->addr.val >>
+					(8 * (op->addr.nbytes - i - 1));
+		}
+
+		reinit_completion(&xqspi->data_completion);
+		xqspi->rxbuf = NULL;
+		xqspi->bytes_to_transfer = op->addr.nbytes;
+		xqspi->bytes_to_receive = 0;
+		zynqmp_qspi_write_op(xqspi, op->addr.buswidth, genfifoentry);
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   zynqmp_gqspi_read(xqspi,
+						     GQSPI_CONFIG_OFST) |
+				   GQSPI_CFG_START_GEN_FIFO_MASK);
+		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+				   GQSPI_IER_TXEMPTY_MASK |
+				   GQSPI_IER_GENFIFOEMPTY_MASK |
+				   GQSPI_IER_TXNOT_FULL_MASK);
+		if (!wait_for_completion_interruptible_timeout
+		    (&xqspi->data_completion, msecs_to_jiffies(1000))) {
+			err = -ETIMEDOUT;
+			goto return_err;
+		}
+	}
+
+	if (op->dummy.nbytes) {
+		tmpbuf = kzalloc(op->dummy.nbytes, GFP_KERNEL | GFP_DMA);
+		if (!tmpbuf)
+			return -ENOMEM;
+		memset(tmpbuf, 0xff, op->dummy.nbytes);
+		reinit_completion(&xqspi->data_completion);
+		xqspi->txbuf = tmpbuf;
+		xqspi->rxbuf = NULL;
+		xqspi->bytes_to_transfer = op->dummy.nbytes;
+		xqspi->bytes_to_receive = 0;
+		zynqmp_qspi_write_op(xqspi, op->dummy.buswidth,
+				     genfifoentry);
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
+				   GQSPI_CFG_START_GEN_FIFO_MASK);
+		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+				   GQSPI_IER_TXEMPTY_MASK |
+				   GQSPI_IER_GENFIFOEMPTY_MASK |
+				   GQSPI_IER_TXNOT_FULL_MASK);
+		if (!wait_for_completion_interruptible_timeout
+		    (&xqspi->data_completion, msecs_to_jiffies(1000))) {
+			err = -ETIMEDOUT;
+			kfree(tmpbuf);
+			goto return_err;
+		}
+
+		kfree(tmpbuf);
+	}
+
+	if (op->data.nbytes) {
+		reinit_completion(&xqspi->data_completion);
+		if (op->data.dir == SPI_MEM_DATA_OUT) {
+			xqspi->txbuf = (u8 *)op->data.buf.out;
+			xqspi->rxbuf = NULL;
+			xqspi->bytes_to_transfer = op->data.nbytes;
+			xqspi->bytes_to_receive = 0;
+			zynqmp_qspi_write_op(xqspi, op->data.buswidth,
+					     genfifoentry);
+			zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+					   zynqmp_gqspi_read
+					   (xqspi, GQSPI_CONFIG_OFST) |
+					   GQSPI_CFG_START_GEN_FIFO_MASK);
+			zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+					   GQSPI_IER_TXEMPTY_MASK |
+					   GQSPI_IER_GENFIFOEMPTY_MASK |
+					   GQSPI_IER_TXNOT_FULL_MASK);
+		} else {
+			xqspi->txbuf = NULL;
+			xqspi->rxbuf = (u8 *)op->data.buf.in;
+			xqspi->bytes_to_receive = op->data.nbytes;
+			xqspi->bytes_to_transfer = 0;
+			zynqmp_qspi_read_op(xqspi, op->data.buswidth,
+					    genfifoentry);
+			zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+					   zynqmp_gqspi_read
+					   (xqspi, GQSPI_CONFIG_OFST) |
+					   GQSPI_CFG_START_GEN_FIFO_MASK);
+			if (xqspi->mode == GQSPI_MODE_DMA) {
+				zynqmp_gqspi_write
+					(xqspi, GQSPI_QSPIDMA_DST_I_EN_OFST,
+					 GQSPI_QSPIDMA_DST_I_EN_DONE_MASK);
+			} else {
+				zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+						   GQSPI_IER_GENFIFOEMPTY_MASK |
+						   GQSPI_IER_RXNEMPTY_MASK |
+						   GQSPI_IER_RXEMPTY_MASK);
+			}
+		}
+		if (!wait_for_completion_interruptible_timeout
+		    (&xqspi->data_completion, msecs_to_jiffies(1000)))
+			err = -ETIMEDOUT;
+	}
+
+return_err:
+
+	zynqmp_qspi_chipselect(mem->spi, true);
+
+	return err;
+}
+
 static const struct dev_pm_ops zynqmp_qspi_dev_pm_ops = {
 	SET_RUNTIME_PM_OPS(zynqmp_runtime_suspend,
 			   zynqmp_runtime_resume, NULL)
 	SET_SYSTEM_SLEEP_PM_OPS(zynqmp_qspi_suspend, zynqmp_qspi_resume)
 };
 
+static const struct spi_controller_mem_ops zynqmp_qspi_mem_ops = {
+	.exec_op = zynqmp_qspi_exec_op,
+};
+
 /**
- * zynqmp_qspi_probe:	Probe method for the QSPI driver
+ * zynqmp_qspi_probe - Probe method for the QSPI driver
  * @pdev:	Pointer to the platform_device structure
  *
  * This function initializes the driver data structures and the hardware.
@@ -1010,17 +1109,18 @@
 static int zynqmp_qspi_probe(struct platform_device *pdev)
 {
 	int ret = 0;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct zynqmp_qspi *xqspi;
 	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*xqspi));
-	if (!master)
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*xqspi));
+	if (!ctlr)
 		return -ENOMEM;
 
-	xqspi = spi_master_get_devdata(master);
-	master->dev.of_node = pdev->dev.of_node;
-	platform_set_drvdata(pdev, master);
+	xqspi = spi_controller_get_devdata(ctlr);
+	xqspi->dev = dev;
+	platform_set_drvdata(pdev, xqspi);
 
 	xqspi->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(xqspi->regs)) {
@@ -1028,7 +1128,6 @@
 		goto remove_master;
 	}
 
-	xqspi->dev = dev;
 	xqspi->pclk = devm_clk_get(&pdev->dev, "pclk");
 	if (IS_ERR(xqspi->pclk)) {
 		dev_err(dev, "pclk clock not found.\n");
@@ -1036,11 +1135,7 @@
 		goto remove_master;
 	}
 
-	ret = clk_prepare_enable(xqspi->pclk);
-	if (ret) {
-		dev_err(dev, "Unable to enable APB clock.\n");
-		goto remove_master;
-	}
+	init_completion(&xqspi->data_completion);
 
 	xqspi->refclk = devm_clk_get(&pdev->dev, "ref_clk");
 	if (IS_ERR(xqspi->refclk)) {
@@ -1049,6 +1144,12 @@
 		goto clk_dis_pclk;
 	}
 
+	ret = clk_prepare_enable(xqspi->pclk);
+	if (ret) {
+		dev_err(dev, "Unable to enable APB clock.\n");
+		goto remove_master;
+	}
+
 	ret = clk_prepare_enable(xqspi->refclk);
 	if (ret) {
 		dev_err(dev, "Unable to enable device clock.\n");
@@ -1070,32 +1171,28 @@
 		goto clk_dis_all;
 	}
 	ret = devm_request_irq(&pdev->dev, xqspi->irq, zynqmp_qspi_irq,
-			       0, pdev->name, master);
+			       0, pdev->name, xqspi);
 	if (ret != 0) {
 		ret = -ENXIO;
 		dev_err(dev, "request_irq failed\n");
 		goto clk_dis_all;
 	}
 
-	master->num_chipselect = GQSPI_DEFAULT_NUM_CS;
-
-	master->setup = zynqmp_qspi_setup;
-	master->set_cs = zynqmp_qspi_chipselect;
-	master->transfer_one = zynqmp_qspi_start_transfer;
-	master->prepare_transfer_hardware = zynqmp_prepare_transfer_hardware;
-	master->unprepare_transfer_hardware =
-					zynqmp_unprepare_transfer_hardware;
-	master->max_speed_hz = clk_get_rate(xqspi->refclk) / 2;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD |
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+	ctlr->num_chipselect = GQSPI_DEFAULT_NUM_CS;
+	ctlr->mem_ops = &zynqmp_qspi_mem_ops;
+	ctlr->setup = zynqmp_qspi_setup_op;
+	ctlr->max_speed_hz = clk_get_rate(xqspi->refclk) / 2;
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD |
 			    SPI_TX_DUAL | SPI_TX_QUAD;
+	ctlr->dev.of_node = np;
 
-	if (master->dev.parent == NULL)
-		master->dev.parent = &master->dev;
-
-	ret = spi_register_master(master);
-	if (ret)
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
+	if (ret) {
+		dev_err(&pdev->dev, "spi_register_controller failed\n");
 		goto clk_dis_all;
+	}
 
 	return 0;
 
@@ -1106,13 +1203,13 @@
 clk_dis_pclk:
 	clk_disable_unprepare(xqspi->pclk);
 remove_master:
-	spi_master_put(master);
+	spi_controller_put(ctlr);
 
 	return ret;
 }
 
 /**
- * zynqmp_qspi_remove:	Remove method for the QSPI driver
+ * zynqmp_qspi_remove - Remove method for the QSPI driver
  * @pdev:	Pointer to the platform_device structure
  *
  * This function is called if a device is physically removed from the system or
@@ -1123,8 +1220,7 @@
  */
 static int zynqmp_qspi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+	struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev);
 
 	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
 	clk_disable_unprepare(xqspi->refclk);
@@ -1132,8 +1228,6 @@
 	pm_runtime_set_suspended(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	spi_unregister_master(master);
-
 	return 0;
 }
 
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 455e99c..859910e 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -146,7 +146,7 @@
 spidev_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
 {
 	struct spidev_data	*spidev;
-	ssize_t			status = 0;
+	ssize_t			status;
 
 	/* chipselect only toggles at start or end of operation */
 	if (count > bufsiz)
@@ -176,7 +176,7 @@
 		size_t count, loff_t *f_pos)
 {
 	struct spidev_data	*spidev;
-	ssize_t			status = 0;
+	ssize_t			status;
 	unsigned long		missing;
 
 	/* chipselect only toggles at start or end of operation */
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 6facf27..7f73b26 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -1,7 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- *  pxa2xx_ssp.h
- *
  *  Copyright (C) 2003 Russell King, All Rights Reserved.
  *
  * This driver supports the following PXA CPU/SSP ports:-
@@ -16,10 +14,16 @@
 #ifndef __LINUX_SSP_H
 #define __LINUX_SSP_H
 
-#include <linux/list.h>
+#include <linux/bits.h>
+#include <linux/compiler_types.h>
 #include <linux/io.h>
-#include <linux/of.h>
+#include <linux/kconfig.h>
+#include <linux/list.h>
+#include <linux/types.h>
 
+struct clk;
+struct device;
+struct device_node;
 
 /*
  * SSP Serial Port Registers
@@ -43,130 +47,127 @@
 #define SSACDD		(0x40)	/* SSP Audio Clock Dither Divider */
 
 /* Common PXA2xx bits first */
-#define SSCR0_DSS	(0x0000000f)	/* Data Size Select (mask) */
+#define SSCR0_DSS	GENMASK(3, 0)	/* Data Size Select (mask) */
 #define SSCR0_DataSize(x)  ((x) - 1)	/* Data Size Select [4..16] */
-#define SSCR0_FRF	(0x00000030)	/* FRame Format (mask) */
+#define SSCR0_FRF	GENMASK(5, 4)	/* FRame Format (mask) */
 #define SSCR0_Motorola	(0x0 << 4)	/* Motorola's Serial Peripheral Interface (SPI) */
 #define SSCR0_TI	(0x1 << 4)	/* Texas Instruments' Synchronous Serial Protocol (SSP) */
 #define SSCR0_National	(0x2 << 4)	/* National Microwire */
-#define SSCR0_ECS	(1 << 6)	/* External clock select */
-#define SSCR0_SSE	(1 << 7)	/* Synchronous Serial Port Enable */
+#define SSCR0_ECS	BIT(6)		/* External clock select */
+#define SSCR0_SSE	BIT(7)		/* Synchronous Serial Port Enable */
 #define SSCR0_SCR(x)	((x) << 8)	/* Serial Clock Rate (mask) */
 
 /* PXA27x, PXA3xx */
-#define SSCR0_EDSS	(1 << 20)	/* Extended data size select */
-#define SSCR0_NCS	(1 << 21)	/* Network clock select */
-#define SSCR0_RIM	(1 << 22)	/* Receive FIFO overrrun interrupt mask */
-#define SSCR0_TUM	(1 << 23)	/* Transmit FIFO underrun interrupt mask */
-#define SSCR0_FRDC	(0x07000000)	/* Frame rate divider control (mask) */
+#define SSCR0_EDSS	BIT(20)		/* Extended data size select */
+#define SSCR0_NCS	BIT(21)		/* Network clock select */
+#define SSCR0_RIM	BIT(22)		/* Receive FIFO overrrun interrupt mask */
+#define SSCR0_TUM	BIT(23)		/* Transmit FIFO underrun interrupt mask */
+#define SSCR0_FRDC	GENMASK(26, 24)	/* Frame rate divider control (mask) */
 #define SSCR0_SlotsPerFrm(x) (((x) - 1) << 24)	/* Time slots per frame [1..8] */
-#define SSCR0_FPCKE	(1 << 29)	/* FIFO packing enable */
-#define SSCR0_ACS	(1 << 30)	/* Audio clock select */
-#define SSCR0_MOD	(1 << 31)	/* Mode (normal or network) */
+#define SSCR0_FPCKE	BIT(29)		/* FIFO packing enable */
+#define SSCR0_ACS	BIT(30)		/* Audio clock select */
+#define SSCR0_MOD	BIT(31)		/* Mode (normal or network) */
 
+#define SSCR1_RIE	BIT(0)		/* Receive FIFO Interrupt Enable */
+#define SSCR1_TIE	BIT(1)		/* Transmit FIFO Interrupt Enable */
+#define SSCR1_LBM	BIT(2)		/* Loop-Back Mode */
+#define SSCR1_SPO	BIT(3)		/* Motorola SPI SSPSCLK polarity setting */
+#define SSCR1_SPH	BIT(4)		/* Motorola SPI SSPSCLK phase setting */
+#define SSCR1_MWDS	BIT(5)		/* Microwire Transmit Data Size */
 
-#define SSCR1_RIE	(1 << 0)	/* Receive FIFO Interrupt Enable */
-#define SSCR1_TIE	(1 << 1)	/* Transmit FIFO Interrupt Enable */
-#define SSCR1_LBM	(1 << 2)	/* Loop-Back Mode */
-#define SSCR1_SPO	(1 << 3)	/* Motorola SPI SSPSCLK polarity setting */
-#define SSCR1_SPH	(1 << 4)	/* Motorola SPI SSPSCLK phase setting */
-#define SSCR1_MWDS	(1 << 5)	/* Microwire Transmit Data Size */
-
-#define SSSR_ALT_FRM_MASK	3	/* Masks the SFRM signal number */
-#define SSSR_TNF	(1 << 2)	/* Transmit FIFO Not Full */
-#define SSSR_RNE	(1 << 3)	/* Receive FIFO Not Empty */
-#define SSSR_BSY	(1 << 4)	/* SSP Busy */
-#define SSSR_TFS	(1 << 5)	/* Transmit FIFO Service Request */
-#define SSSR_RFS	(1 << 6)	/* Receive FIFO Service Request */
-#define SSSR_ROR	(1 << 7)	/* Receive FIFO Overrun */
+#define SSSR_ALT_FRM_MASK	GENMASK(1, 0)	/* Masks the SFRM signal number */
+#define SSSR_TNF		BIT(2)		/* Transmit FIFO Not Full */
+#define SSSR_RNE		BIT(3)		/* Receive FIFO Not Empty */
+#define SSSR_BSY		BIT(4)		/* SSP Busy */
+#define SSSR_TFS		BIT(5)		/* Transmit FIFO Service Request */
+#define SSSR_RFS		BIT(6)		/* Receive FIFO Service Request */
+#define SSSR_ROR		BIT(7)		/* Receive FIFO Overrun */
 
 #define RX_THRESH_DFLT	8
 #define TX_THRESH_DFLT	8
 
-#define SSSR_TFL_MASK	(0xf << 8)	/* Transmit FIFO Level mask */
-#define SSSR_RFL_MASK	(0xf << 12)	/* Receive FIFO Level mask */
+#define SSSR_TFL_MASK	GENMASK(11, 8)	/* Transmit FIFO Level mask */
+#define SSSR_RFL_MASK	GENMASK(15, 12)	/* Receive FIFO Level mask */
 
-#define SSCR1_TFT	(0x000003c0)	/* Transmit FIFO Threshold (mask) */
+#define SSCR1_TFT	GENMASK(9, 6)	/* Transmit FIFO Threshold (mask) */
 #define SSCR1_TxTresh(x) (((x) - 1) << 6)	/* level [1..16] */
-#define SSCR1_RFT	(0x00003c00)	/* Receive FIFO Threshold (mask) */
+#define SSCR1_RFT	GENMASK(13, 10)	/* Receive FIFO Threshold (mask) */
 #define SSCR1_RxTresh(x) (((x) - 1) << 10)	/* level [1..16] */
 
 #define RX_THRESH_CE4100_DFLT	2
 #define TX_THRESH_CE4100_DFLT	2
 
-#define CE4100_SSSR_TFL_MASK	(0x3 << 8)	/* Transmit FIFO Level mask */
-#define CE4100_SSSR_RFL_MASK	(0x3 << 12)	/* Receive FIFO Level mask */
+#define CE4100_SSSR_TFL_MASK	GENMASK(9, 8)	/* Transmit FIFO Level mask */
+#define CE4100_SSSR_RFL_MASK	GENMASK(13, 12)	/* Receive FIFO Level mask */
 
-#define CE4100_SSCR1_TFT	(0x000000c0)	/* Transmit FIFO Threshold (mask) */
+#define CE4100_SSCR1_TFT	GENMASK(7, 6)	/* Transmit FIFO Threshold (mask) */
 #define CE4100_SSCR1_TxTresh(x) (((x) - 1) << 6)	/* level [1..4] */
-#define CE4100_SSCR1_RFT	(0x00000c00)	/* Receive FIFO Threshold (mask) */
+#define CE4100_SSCR1_RFT	GENMASK(11, 10)	/* Receive FIFO Threshold (mask) */
 #define CE4100_SSCR1_RxTresh(x) (((x) - 1) << 10)	/* level [1..4] */
 
 /* QUARK_X1000 SSCR0 bit definition */
-#define QUARK_X1000_SSCR0_DSS		(0x1F << 0)	/* Data Size Select (mask) */
+#define QUARK_X1000_SSCR0_DSS		GENMASK(4, 0)	/* Data Size Select (mask) */
 #define QUARK_X1000_SSCR0_DataSize(x)	((x) - 1)	/* Data Size Select [4..32] */
-#define QUARK_X1000_SSCR0_FRF		(0x3 << 5)	/* FRame Format (mask) */
+#define QUARK_X1000_SSCR0_FRF		GENMASK(6, 5)	/* FRame Format (mask) */
 #define QUARK_X1000_SSCR0_Motorola	(0x0 << 5)	/* Motorola's Serial Peripheral Interface (SPI) */
 
 #define RX_THRESH_QUARK_X1000_DFLT	1
 #define TX_THRESH_QUARK_X1000_DFLT	16
 
-#define QUARK_X1000_SSSR_TFL_MASK	(0x1F << 8)	/* Transmit FIFO Level mask */
-#define QUARK_X1000_SSSR_RFL_MASK	(0x1F << 13)	/* Receive FIFO Level mask */
+#define QUARK_X1000_SSSR_TFL_MASK	GENMASK(12, 8)	/* Transmit FIFO Level mask */
+#define QUARK_X1000_SSSR_RFL_MASK	GENMASK(17, 13)	/* Receive FIFO Level mask */
 
-#define QUARK_X1000_SSCR1_TFT	(0x1F << 6)	/* Transmit FIFO Threshold (mask) */
+#define QUARK_X1000_SSCR1_TFT	GENMASK(10, 6)	/* Transmit FIFO Threshold (mask) */
 #define QUARK_X1000_SSCR1_TxTresh(x) (((x) - 1) << 6)	/* level [1..32] */
-#define QUARK_X1000_SSCR1_RFT	(0x1F << 11)	/* Receive FIFO Threshold (mask) */
+#define QUARK_X1000_SSCR1_RFT	GENMASK(15, 11)	/* Receive FIFO Threshold (mask) */
 #define QUARK_X1000_SSCR1_RxTresh(x) (((x) - 1) << 11)	/* level [1..32] */
-#define QUARK_X1000_SSCR1_STRF	(1 << 17)	/* Select FIFO or EFWR */
-#define QUARK_X1000_SSCR1_EFWR	(1 << 16)	/* Enable FIFO Write/Read */
+#define QUARK_X1000_SSCR1_EFWR	BIT(16)		/* Enable FIFO Write/Read */
+#define QUARK_X1000_SSCR1_STRF	BIT(17)		/* Select FIFO or EFWR */
 
 /* extra bits in PXA255, PXA26x and PXA27x SSP ports */
 #define SSCR0_TISSP		(1 << 4)	/* TI Sync Serial Protocol */
 #define SSCR0_PSP		(3 << 4)	/* PSP - Programmable Serial Protocol */
-#define SSCR1_TTELP		(1 << 31)	/* TXD Tristate Enable Last Phase */
-#define SSCR1_TTE		(1 << 30)	/* TXD Tristate Enable */
-#define SSCR1_EBCEI		(1 << 29)	/* Enable Bit Count Error interrupt */
-#define SSCR1_SCFR		(1 << 28)	/* Slave Clock free Running */
-#define SSCR1_ECRA		(1 << 27)	/* Enable Clock Request A */
-#define SSCR1_ECRB		(1 << 26)	/* Enable Clock request B */
-#define SSCR1_SCLKDIR		(1 << 25)	/* Serial Bit Rate Clock Direction */
-#define SSCR1_SFRMDIR		(1 << 24)	/* Frame Direction */
-#define SSCR1_RWOT		(1 << 23)	/* Receive Without Transmit */
-#define SSCR1_TRAIL		(1 << 22)	/* Trailing Byte */
-#define SSCR1_TSRE		(1 << 21)	/* Transmit Service Request Enable */
-#define SSCR1_RSRE		(1 << 20)	/* Receive Service Request Enable */
-#define SSCR1_TINTE		(1 << 19)	/* Receiver Time-out Interrupt enable */
-#define SSCR1_PINTE		(1 << 18)	/* Peripheral Trailing Byte Interrupt Enable */
-#define SSCR1_IFS		(1 << 16)	/* Invert Frame Signal */
-#define SSCR1_STRF		(1 << 15)	/* Select FIFO or EFWR */
-#define SSCR1_EFWR		(1 << 14)	/* Enable FIFO Write/Read */
 
-#define SSSR_BCE		(1 << 23)	/* Bit Count Error */
-#define SSSR_CSS		(1 << 22)	/* Clock Synchronisation Status */
-#define SSSR_TUR		(1 << 21)	/* Transmit FIFO Under Run */
-#define SSSR_EOC		(1 << 20)	/* End Of Chain */
-#define SSSR_TINT		(1 << 19)	/* Receiver Time-out Interrupt */
-#define SSSR_PINT		(1 << 18)	/* Peripheral Trailing Byte Interrupt */
+#define SSCR1_EFWR		BIT(14)		/* Enable FIFO Write/Read */
+#define SSCR1_STRF		BIT(15)		/* Select FIFO or EFWR */
+#define SSCR1_IFS		BIT(16)		/* Invert Frame Signal */
+#define SSCR1_PINTE		BIT(18)		/* Peripheral Trailing Byte Interrupt Enable */
+#define SSCR1_TINTE		BIT(19)		/* Receiver Time-out Interrupt enable */
+#define SSCR1_RSRE		BIT(20)		/* Receive Service Request Enable */
+#define SSCR1_TSRE		BIT(21)		/* Transmit Service Request Enable */
+#define SSCR1_TRAIL		BIT(22)		/* Trailing Byte */
+#define SSCR1_RWOT		BIT(23)		/* Receive Without Transmit */
+#define SSCR1_SFRMDIR		BIT(24)		/* Frame Direction */
+#define SSCR1_SCLKDIR		BIT(25)		/* Serial Bit Rate Clock Direction */
+#define SSCR1_ECRB		BIT(26)		/* Enable Clock request B */
+#define SSCR1_ECRA		BIT(27)		/* Enable Clock Request A */
+#define SSCR1_SCFR		BIT(28)		/* Slave Clock free Running */
+#define SSCR1_EBCEI		BIT(29)		/* Enable Bit Count Error interrupt */
+#define SSCR1_TTE		BIT(30)		/* TXD Tristate Enable */
+#define SSCR1_TTELP		BIT(31)		/* TXD Tristate Enable Last Phase */
 
+#define SSSR_PINT		BIT(18)		/* Peripheral Trailing Byte Interrupt */
+#define SSSR_TINT		BIT(19)		/* Receiver Time-out Interrupt */
+#define SSSR_EOC		BIT(20)		/* End Of Chain */
+#define SSSR_TUR		BIT(21)		/* Transmit FIFO Under Run */
+#define SSSR_CSS		BIT(22)		/* Clock Synchronisation Status */
+#define SSSR_BCE		BIT(23)		/* Bit Count Error */
 
 #define SSPSP_SCMODE(x)		((x) << 0)	/* Serial Bit Rate Clock Mode */
-#define SSPSP_SFRMP		(1 << 2)	/* Serial Frame Polarity */
-#define SSPSP_ETDS		(1 << 3)	/* End of Transfer data State */
+#define SSPSP_SFRMP		BIT(2)		/* Serial Frame Polarity */
+#define SSPSP_ETDS		BIT(3)		/* End of Transfer data State */
 #define SSPSP_STRTDLY(x)	((x) << 4)	/* Start Delay */
 #define SSPSP_DMYSTRT(x)	((x) << 7)	/* Dummy Start */
 #define SSPSP_SFRMDLY(x)	((x) << 9)	/* Serial Frame Delay */
 #define SSPSP_SFRMWDTH(x)	((x) << 16)	/* Serial Frame Width */
 #define SSPSP_DMYSTOP(x)	((x) << 23)	/* Dummy Stop */
-#define SSPSP_FSRT		(1 << 25)	/* Frame Sync Relative Timing */
+#define SSPSP_FSRT		BIT(25)		/* Frame Sync Relative Timing */
 
 /* PXA3xx */
 #define SSPSP_EDMYSTRT(x)	((x) << 26)     /* Extended Dummy Start */
 #define SSPSP_EDMYSTOP(x)	((x) << 28)     /* Extended Dummy Stop */
 #define SSPSP_TIMING_MASK	(0x7f8001f0)
 
-#define SSACD_SCDB		(1 << 3)	/* SSPSYSCLK Divider Bypass */
-#define SSACD_ACPS(x)		((x) << 4)	/* Audio clock PLL select */
 #define SSACD_ACDS(x)		((x) << 0)	/* Audio clock divider select */
 #define SSACD_ACDS_1		(0)
 #define SSACD_ACDS_2		(1)
@@ -174,18 +175,24 @@
 #define SSACD_ACDS_8		(3)
 #define SSACD_ACDS_16		(4)
 #define SSACD_ACDS_32		(5)
+#define SSACD_SCDB		BIT(3)		/* SSPSYSCLK Divider Bypass */
 #define SSACD_SCDB_4X		(0)
 #define SSACD_SCDB_1X		(1)
-#define SSACD_SCDX8		(1 << 7)	/* SYSCLK division ratio select */
+#define SSACD_ACPS(x)		((x) << 4)	/* Audio clock PLL select */
+#define SSACD_SCDX8		BIT(7)		/* SYSCLK division ratio select */
 
 /* LPSS SSP */
 #define SSITF			0x44		/* TX FIFO trigger level */
+#define SSITF_TxHiThresh(x)	(((x) - 1) << 0)
 #define SSITF_TxLoThresh(x)	(((x) - 1) << 8)
-#define SSITF_TxHiThresh(x)	((x) - 1)
 
 #define SSIRF			0x48		/* RX FIFO trigger level */
 #define SSIRF_RxThresh(x)	((x) - 1)
 
+/* LPT/WPT SSP */
+#define SSCR2		(0x40)	/* SSP Command / Status 2 */
+#define SSPSP2		(0x44)	/* SSP Programmable Serial Protocol 2 */
+
 enum pxa_ssp_type {
 	SSP_UNDEFINED = 0,
 	PXA25x_SSP,  /* pxa 210, 250, 255, 26x */