Merge tag 'kvm-x86-vmx-6.4' of https://github.com/kvm-x86/linux into HEAD
KVM VMX changes for 6.4:
- Fix a bug in emulation of ENCLS in compatibility mode
- Allow emulation of NOP and PAUSE for L2
- Misc cleanups
diff --git a/.gitignore b/.gitignore
index 8fe465f..70ec603 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,6 +78,7 @@
# RPM spec file (make rpm-pkg)
#
/*.spec
+/rpmbuild/
#
# Debian directory (make deb-pkg)
diff --git a/.mailmap b/.mailmap
index 424564f..e2af78f 100644
--- a/.mailmap
+++ b/.mailmap
@@ -28,6 +28,7 @@
Alexander Mikhalitsyn <alexander@mihalicyn.com> <alexander.mikhalitsyn@virtuozzo.com>
Alexander Mikhalitsyn <alexander@mihalicyn.com> <aleksandr.mikhalitsyn@canonical.com>
Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electrons.com>
+Alexandre Ghiti <alex@ghiti.fr> <alexandre.ghiti@canonical.com>
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
@@ -121,7 +122,7 @@
Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@imgtec.com>
Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@mips.com>
<dev.kurt@vandijck-laurijssen.be> <kurt.van.dijck@eia.be>
-Dikshita Agarwal <dikshita@qti.qualcomm.com> <dikshita@codeaurora.org>
+Dikshita Agarwal <quic_dikshita@quicinc.com> <dikshita@codeaurora.org>
Dmitry Baryshkov <dbaryshkov@gmail.com>
Dmitry Baryshkov <dbaryshkov@gmail.com> <[dbaryshkov@gmail.com]>
Dmitry Baryshkov <dbaryshkov@gmail.com> <dmitry_baryshkov@mentor.com>
@@ -132,6 +133,8 @@
Domen Puncer <domen@coderock.org>
Douglas Gilbert <dougg@torque.net>
Ed L. Cashin <ecashin@coraid.com>
+Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
+Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
@@ -194,6 +197,7 @@
Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
+Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@tuni.fi>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
@@ -213,6 +217,9 @@
Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
+Jiri Pirko <jiri@resnulli.us> <jiri@nvidia.com>
+Jiri Pirko <jiri@resnulli.us> <jiri@mellanox.com>
+Jiri Pirko <jiri@resnulli.us> <jpirko@redhat.com>
Jiri Slaby <jirislaby@kernel.org> <jirislaby@gmail.com>
Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com>
Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
@@ -374,6 +381,7 @@
Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
Rajeev Nandan <quic_rajeevny@quicinc.com> <rajeevny@codeaurora.org>
+Rajendra Nayak <quic_rjendra@quicinc.com> <rnayak@codeaurora.org>
Rajesh Shah <rajesh.shah@intel.com>
Ralf Baechle <ralf@linux-mips.org>
Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
@@ -382,6 +390,9 @@
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
+Richard Leitner <richard.leitner@linux.dev> <dev@g0hl1n.net>
+Richard Leitner <richard.leitner@linux.dev> <me@g0hl1n.net>
+Richard Leitner <richard.leitner@linux.dev> <richard.leitner@skidata.com>
Robert Foss <rfoss@kernel.org> <robert.foss@linaro.org>
Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
@@ -392,6 +403,7 @@
Rudolf Marek <R.Marek@sh.cvut.cz>
Rui Saraiva <rmps@joel.ist.utl.pt>
Sachin P Sant <ssant@in.ibm.com>
+Sai Prakash Ranjan <quic_saipraka@quicinc.com> <saiprakash.ranjan@codeaurora.org>
Sakari Ailus <sakari.ailus@linux.intel.com> <sakari.ailus@iki.fi>
Sam Ravnborg <sam@mars.ravnborg.org>
Sankeerth Billakanti <quic_sbillaka@quicinc.com> <sbillaka@codeaurora.org>
@@ -432,6 +444,10 @@
Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu>
Thomas Pedersen <twp@codeaurora.org>
Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com>
+Tobias Klauser <tklauser@distanz.ch> <tobias.klauser@gmail.com>
+Tobias Klauser <tklauser@distanz.ch> <klto@zhaw.ch>
+Tobias Klauser <tklauser@distanz.ch> <tklauser@nuerscht.ch>
+Tobias Klauser <tklauser@distanz.ch> <tklauser@xenon.tklauser.home>
Todor Tomov <todor.too@gmail.com> <todor.tomov@linaro.org>
Tony Luck <tony.luck@intel.com>
TripleX Chung <xxx.phy@gmail.com> <triplex@zh-kernel.org>
diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
index 50b690f..68abc08 100644
--- a/Documentation/driver-api/vfio.rst
+++ b/Documentation/driver-api/vfio.rst
@@ -242,7 +242,7 @@
VFIO User API
-------------------------------------------------------------------------------
-Please see include/linux/vfio.h for complete API documentation.
+Please see include/uapi/linux/vfio.h for complete API documentation.
VFIO bus driver API
-------------------------------------------------------------------------------
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index c53f302..f3b344f 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -1222,7 +1222,7 @@
return
-ECHILD and it will be called again in ref-walk mode.
-``_weak_revalidate``
+``d_weak_revalidate``
called when the VFS needs to revalidate a "jumped" dentry. This
is called when a path-walk ends at dentry that was not acquired
by doing a lookup in the parent directory. This includes "/",
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index b9dc0c6..56d9913 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -19,7 +19,7 @@
platform devices.
- Devices behind real busses where there is a connector resource
- are represented as struct spi_device or struct i2c_device. Note
+ are represented as struct spi_device or struct i2c_client. Note
that standard UARTs are not busses so there is no struct uart_device,
although some of them may be represented by struct serdev_device.
diff --git a/Documentation/maintainer/rebasing-and-merging.rst b/Documentation/maintainer/rebasing-and-merging.rst
index 09f988e..85800ce 100644
--- a/Documentation/maintainer/rebasing-and-merging.rst
+++ b/Documentation/maintainer/rebasing-and-merging.rst
@@ -213,11 +213,7 @@
emptied entirely into the mainline during the merge window, you can pull it
forward with a command like::
- git merge v5.2-rc1^0
-
-The "^0" will cause Git to do a fast-forward merge (which should be
-possible in this situation), thus avoiding the addition of a spurious merge
-commit.
+ git merge --ff-only v5.2-rc1
The guidelines laid out above are just that: guidelines. There will always
be situations that call out for a different solution, and these guidelines
diff --git a/Documentation/mm/hugetlbfs_reserv.rst b/Documentation/mm/hugetlbfs_reserv.rst
index 3d05d64..d9c2b0f 100644
--- a/Documentation/mm/hugetlbfs_reserv.rst
+++ b/Documentation/mm/hugetlbfs_reserv.rst
@@ -5,10 +5,10 @@
Overview
========
-Huge pages as described at Documentation/mm/hugetlbpage.rst are typically
-preallocated for application use. These huge pages are instantiated in a
-task's address space at page fault time if the VMA indicates huge pages are
-to be used. If no huge page exists at page fault time, the task is sent
+Huge pages as described at Documentation/admin-guide/mm/hugetlbpage.rst are
+typically preallocated for application use. These huge pages are instantiated
+in a task's address space at page fault time if the VMA indicates huge pages
+are to be used. If no huge page exists at page fault time, the task is sent
a SIGBUS and often dies an unhappy death. Shortly after huge page support
was added, it was determined that it would be better to detect a shortage
of huge pages at mmap() time. The idea is that if there were not enough
diff --git a/Documentation/mm/physical_memory.rst b/Documentation/mm/physical_memory.rst
index f9d7ea4..1bc888d 100644
--- a/Documentation/mm/physical_memory.rst
+++ b/Documentation/mm/physical_memory.rst
@@ -66,7 +66,7 @@
also populated on boot using one of ``kernelcore``, ``movablecore`` and
``movable_node`` kernel command line parameters. See
Documentation/mm/page_migration.rst and
- Documentation/admin-guide/mm/memory_hotplug.rst for additional details.
+ Documentation/admin-guide/mm/memory-hotplug.rst for additional details.
* ``ZONE_DEVICE`` represents memory residing on devices such as PMEM and GPU.
It has different characteristics than RAM zone types and it exists to provide
diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index f082a5a..5c3642b 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
%YAML 1.2
---
$id: http://kernel.org/schemas/netlink/genetlink-c.yaml#
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index c6b8c77f..5e98c6d 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
%YAML 1.2
---
$id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml#
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index b2d56ab..d35dcd6 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
%YAML 1.2
---
$id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml#
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 18ecb7d..4727c067 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
name: ethtool
diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml
index cff1042..3e13826 100644
--- a/Documentation/netlink/specs/fou.yaml
+++ b/Documentation/netlink/specs/fou.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
name: fou
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 24de747..b99e7ff 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
name: netdev
@@ -9,6 +9,7 @@
-
type: flags
name: xdp-act
+ render-max: true
entries:
-
name: basic
diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
index aac63fc..25ce72a 100644
--- a/Documentation/networking/xdp-rx-metadata.rst
+++ b/Documentation/networking/xdp-rx-metadata.rst
@@ -23,10 +23,13 @@
An XDP program can use these kfuncs to read the metadata into stack
variables for its own consumption. Or, to pass the metadata on to other
consumers, an XDP program can store it into the metadata area carried
-ahead of the packet.
+ahead of the packet. Not all packets will necessary have the requested
+metadata available in which case the driver returns ``-ENODATA``.
Not all kfuncs have to be implemented by the device driver; when not
-implemented, the default ones that return ``-EOPNOTSUPP`` will be used.
+implemented, the default ones that return ``-EOPNOTSUPP`` will be used
+to indicate the device driver have not implemented this kfunc.
+
Within an XDP frame, the metadata layout (accessed via ``xdp_buff``) is
as follows::
diff --git a/Documentation/process/programming-language.rst b/Documentation/process/programming-language.rst
index 5fc9160..bc56dee 100644
--- a/Documentation/process/programming-language.rst
+++ b/Documentation/process/programming-language.rst
@@ -12,10 +12,6 @@
This dialect contains many extensions to the language [gnu-extensions]_,
and many of them are used within the kernel as a matter of course.
-There is some support for compiling the kernel with ``icc`` [icc]_ for several
-of the architectures, although at the time of writing it is not completed,
-requiring third-party patches.
-
Attributes
----------
@@ -35,12 +31,28 @@
Please refer to ``include/linux/compiler_attributes.h`` for more information.
+Rust
+----
+
+The kernel has experimental support for the Rust programming language
+[rust-language]_ under ``CONFIG_RUST``. It is compiled with ``rustc`` [rustc]_
+under ``--edition=2021`` [rust-editions]_. Editions are a way to introduce
+small changes to the language that are not backwards compatible.
+
+On top of that, some unstable features [rust-unstable-features]_ are used in
+the kernel. Unstable features may change in the future, thus it is an important
+goal to reach a point where only stable features are used.
+
+Please refer to Documentation/rust/index.rst for more information.
+
.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
.. [gcc] https://gcc.gnu.org
.. [clang] https://clang.llvm.org
-.. [icc] https://software.intel.com/en-us/c-compilers
.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
-
+.. [rust-language] https://www.rust-lang.org
+.. [rustc] https://doc.rust-lang.org/rustc/
+.. [rust-editions] https://doc.rust-lang.org/edition-guide/editions/
+.. [rust-unstable-features] https://github.com/Rust-for-Linux/linux/issues/2
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index eac7167..69ce64e 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -320,7 +320,7 @@
reviewers sometimes get grumpy. Even in that case, though, respond
politely and address the problems they have pointed out. When sending a next
version, add a ``patch changelog`` to the cover letter or to individual patches
-explaining difference aganst previous submission (see
+explaining difference against previous submission (see
:ref:`the_canonical_patch_format`).
See Documentation/process/email-clients.rst for recommendations on email
diff --git a/Documentation/scheduler/sched-capacity.rst b/Documentation/scheduler/sched-capacity.rst
index 8e2b853..e2c1cf7 100644
--- a/Documentation/scheduler/sched-capacity.rst
+++ b/Documentation/scheduler/sched-capacity.rst
@@ -258,7 +258,7 @@
needs to be handed to it. Architectures must define arch_scale_cpu_capacity()
for that purpose.
-The arm and arm64 architectures directly map this to the arch_topology driver
+The arm, arm64, and RISC-V architectures directly map this to the arch_topology driver
CPU scaling data, which is derived from the capacity-dmips-mhz CPU binding; see
Documentation/devicetree/bindings/cpu/cpu-capacity.txt.
diff --git a/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst b/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst
index c1fa353..b7a05442 100644
--- a/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst
+++ b/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst
@@ -15,7 +15,8 @@
概述
====
-Documentation/mm/hugetlbpage.rst 中描述的巨页通常是预先分配给应用程序使用的。如果VMA指
+Documentation/admin-guide/mm/hugetlbpage.rst
+中描述的巨页通常是预先分配给应用程序使用的 。如果VMA指
示要使用巨页,这些巨页会在缺页异常时被实例化到任务的地址空间。如果在缺页异常
时没有巨页存在,任务就会被发送一个SIGBUS,并经常不高兴地死去。在加入巨页支
持后不久,人们决定,在mmap()时检测巨页的短缺情况会更好。这个想法是,如果
diff --git a/Documentation/translations/zh_CN/scheduler/sched-capacity.rst b/Documentation/translations/zh_CN/scheduler/sched-capacity.rst
index e07ffdd..8cba135 100644
--- a/Documentation/translations/zh_CN/scheduler/sched-capacity.rst
+++ b/Documentation/translations/zh_CN/scheduler/sched-capacity.rst
@@ -231,7 +231,7 @@
当前,Linux无法凭自身算出CPU算力,因此必须要有把这个信息传递给Linux的方式。每个架构必须为此
定义arch_scale_cpu_capacity()函数。
-arm和arm64架构直接把这个信息映射到arch_topology驱动的CPU scaling数据中(译注:参考
+arm、arm64和RISC-V架构直接把这个信息映射到arch_topology驱动的CPU scaling数据中(译注:参考
arch_topology.h的percpu变量cpu_scale),它是从capacity-dmips-mhz CPU binding中衍生计算
出来的。参见Documentation/devicetree/bindings/cpu/cpu-capacity.txt。
diff --git a/Documentation/usb/gadget_uvc.rst b/Documentation/usb/gadget_uvc.rst
new file mode 100644
index 0000000..6d22fac
--- /dev/null
+++ b/Documentation/usb/gadget_uvc.rst
@@ -0,0 +1,352 @@
+=======================
+Linux UVC Gadget Driver
+=======================
+
+Overview
+--------
+The UVC Gadget driver is a driver for hardware on the *device* side of a USB
+connection. It is intended to run on a Linux system that has USB device-side
+hardware such as boards with an OTG port.
+
+On the device system, once the driver is bound it appears as a V4L2 device with
+the output capability.
+
+On the host side (once connected via USB cable), a device running the UVC Gadget
+driver *and controlled by an appropriate userspace program* should appear as a UVC
+specification compliant camera, and function appropriately with any program
+designed to handle them. The userspace program running on the device system can
+queue image buffers from a variety of sources to be transmitted via the USB
+connection. Typically this would mean forwarding the buffers from a camera sensor
+peripheral, but the source of the buffer is entirely dependent on the userspace
+companion program.
+
+Configuring the device kernel
+-----------------------------
+The Kconfig options USB_CONFIGFS, USB_LIBCOMPOSITE, USB_CONFIGFS_F_UVC and
+USB_F_UVC must be selected to enable support for the UVC gadget.
+
+Configuring the gadget through configfs
+---------------------------------------
+The UVC Gadget expects to be configured through configfs using the UVC function.
+This allows a significant degree of flexibility, as many of a UVC device's
+settings can be controlled this way.
+
+Not all of the available attributes are described here. For a complete enumeration
+see Documentation/ABI/testing/configfs-usb-gadget-uvc
+
+Assumptions
+~~~~~~~~~~~
+This section assumes that you have mounted configfs at `/sys/kernel/config` and
+created a gadget as `/sys/kernel/config/usb_gadget/g1`.
+
+The UVC Function
+~~~~~~~~~~~~~~~~
+
+The first step is to create the UVC function:
+
+.. code-block:: bash
+
+ # These variables will be assumed throughout the rest of the document
+ CONFIGFS="/sys/kernel/config"
+ GADGET="$CONFIGFS/usb_gadget/g1"
+ FUNCTION="$GADGET/functions/uvc.0"
+
+ mkdir -p $FUNCTION
+
+Formats and Frames
+~~~~~~~~~~~~~~~~~~
+
+You must configure the gadget by telling it which formats you support, as well
+as the frame sizes and frame intervals that are supported for each format. In
+the current implementation there is no way for the gadget to refuse to set a
+format that the host instructs it to set, so it is important that this step is
+completed *accurately* to ensure that the host never asks for a format that
+can't be provided.
+
+Formats are created under the streaming/uncompressed and streaming/mjpeg configfs
+groups, with the framesizes created under the formats in the following
+structure:
+
+::
+
+ uvc.0 +
+ |
+ + streaming +
+ |
+ + mjpeg +
+ | |
+ | + mjpeg +
+ | |
+ | + 720p
+ | |
+ | + 1080p
+ |
+ + uncompressed +
+ |
+ + yuyv +
+ |
+ + 720p
+ |
+ + 1080p
+
+Each frame can then be configured with a width and height, plus the maximum
+buffer size required to store a single frame, and finally with the supported
+frame intervals for that format and framesize. Width and height are enumerated in
+units of pixels, frame interval in units of 100ns. To create the structure
+above with 2, 15 and 100 fps frameintervals for each framesize for example you
+might do:
+
+.. code-block:: bash
+
+ create_frame() {
+ # Example usage:
+ # create_frame <width> <height> <group> <format name>
+
+ WIDTH=$1
+ HEIGHT=$2
+ FORMAT=$3
+ NAME=$4
+
+ wdir=$FUNCTION/streaming/$FORMAT/$NAME/${HEIGHT}p
+
+ mkdir -p $wdir
+ echo $WIDTH > $wdir/wWidth
+ echo $HEIGHT > $wdir/wHeight
+ echo $(( $WIDTH * $HEIGHT * 2 )) > $wdir/dwMaxVideoFrameBufferSize
+ cat <<EOF > $wdir/dwFrameInterval
+ 666666
+ 100000
+ 5000000
+ EOF
+ }
+
+ create_frame 1280 720 mjpeg mjpeg
+ create_frame 1920 1080 mjpeg mjpeg
+ create_frame 1280 720 uncompressed yuyv
+ create_frame 1920 1080 uncompressed yuyv
+
+The only uncompressed format currently supported is YUYV, which is detailed at
+Documentation/userspace-api/media/v4l/pixfmt-packed.yuv.rst.
+
+Color Matching Descriptors
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+It's possible to specify some colometry information for each format you create.
+This step is optional, and default information will be included if this step is
+skipped; those default values follow those defined in the Color Matching Descriptor
+section of the UVC specification.
+
+To create a Color Matching Descriptor, create a configfs item and set its three
+attributes to your desired settings and then link to it from the format you wish
+it to be associated with:
+
+.. code-block:: bash
+
+ # Create a new Color Matching Descriptor
+
+ mkdir $FUNCTION/streaming/color_matching/yuyv
+ pushd $FUNCTION/streaming/color_matching/yuyv
+
+ echo 1 > bColorPrimaries
+ echo 1 > bTransferCharacteristics
+ echo 4 > bMatrixCoefficients
+
+ popd
+
+ # Create a symlink to the Color Matching Descriptor from the format's config item
+ ln -s $FUNCTION/streaming/color_matching/yuyv $FUNCTION/streaming/uncompressed/yuyv
+
+For details about the valid values, consult the UVC specification. Note that a
+default color matching descriptor exists and is used by any format which does
+not have a link to a different Color Matching Descriptor. It's possible to
+change the attribute settings for the default descriptor, so bear in mind that if
+you do that you are altering the defaults for any format that does not link to
+a different one.
+
+
+Header linking
+~~~~~~~~~~~~~~
+
+The UVC specification requires that Format and Frame descriptors be preceded by
+Headers detailing things such as the number and cumulative size of the different
+Format descriptors that follow. This and similar operations are acheived in
+configfs by linking between the configfs item representing the header and the
+config items representing those other descriptors, in this manner:
+
+.. code-block:: bash
+
+ mkdir $FUNCTION/streaming/header/h
+
+ # This section links the format descriptors and their associated frames
+ # to the header
+ cd $FUNCTION/streaming/header/h
+ ln -s ../../uncompressed/yuyv
+ ln -s ../../mjpeg/mjpeg
+
+ # This section ensures that the header will be transmitted for each
+ # speed's set of descriptors. If support for a particular speed is not
+ # needed then it can be skipped here.
+ cd ../../class/fs
+ ln -s ../../header/h
+ cd ../../class/hs
+ ln -s ../../header/h
+ cd ../../class/ss
+ ln -s ../../header/h
+ cd ../../../control
+ mkdir header/h
+ ln -s header/h class/fs
+ ln -s header/h class/ss
+
+
+Extension Unit Support
+~~~~~~~~~~~~~~~~~~~~~~
+
+A UVC Extension Unit (XU) basically provides a distinct unit to which control set
+and get requests can be addressed. The meaning of those control requests is
+entirely implementation dependent, but may be used to control settings outside
+of the UVC specification (for example enabling or disabling video effects). An
+XU can be inserted into the UVC unit chain or left free-hanging.
+
+Configuring an extension unit involves creating an entry in the appropriate
+directory and setting its attributes appropriately, like so:
+
+.. code-block:: bash
+
+ mkdir $FUNCTION/control/extensions/xu.0
+ pushd $FUNCTION/control/extensions/xu.0
+
+ # Set the bUnitID of the Processing Unit as the source for this
+ # Extension Unit
+ echo 2 > baSourceID
+
+ # Set this XU as the source of the default output terminal. This inserts
+ # the XU into the UVC chain between the PU and OT such that the final
+ # chain is IT > PU > XU.0 > OT
+ cat bUnitID > ../../terminal/output/default/baSourceID
+
+ # Flag some controls as being available for use. The bmControl field is
+ # a bitmap with each bit denoting the availability of a particular
+ # control. For example to flag the 0th, 2nd and 3rd controls available:
+ echo 0x0d > bmControls
+
+ # Set the GUID; this is a vendor-specific code identifying the XU.
+ echo -e -n "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" > guidExtensionCode
+
+ popd
+
+The bmControls attribute and the baSourceID attribute are multi-value attributes.
+This means that you may write multiple newline separated values to them. For
+example to flag the 1st, 2nd, 9th and 10th controls as being available you would
+need to write two values to bmControls, like so:
+
+.. code-block:: bash
+
+ cat << EOF > bmControls
+ 0x03
+ 0x03
+ EOF
+
+The multi-value nature of the baSourceID attribute belies the fact that XUs can
+be multiple-input, though note that this currently has no significant effect.
+
+The bControlSize attribute reflects the size of the bmControls attribute, and
+similarly bNrInPins reflects the size of the baSourceID attributes. Both
+attributes are automatically increased / decreased as you set bmControls and
+baSourceID. It is also possible to manually increase or decrease bControlSize
+which has the effect of truncating entries to the new size, or padding entries
+out with 0x00, for example:
+
+::
+
+ $ cat bmControls
+ 0x03
+ 0x05
+
+ $ cat bControlSize
+ 2
+
+ $ echo 1 > bControlSize
+ $ cat bmControls
+ 0x03
+
+ $ echo 2 > bControlSize
+ $ cat bmControls
+ 0x03
+ 0x00
+
+bNrInPins and baSourceID function in the same way.
+
+Custom Strings Support
+~~~~~~~~~~~~~~~~~~~~~~
+
+String descriptors that provide a textual description for various parts of a
+USB device can be defined in the usual place within USB configfs, and may then
+be linked to from the UVC function root or from Extension Unit directories to
+assign those strings as descriptors:
+
+.. code-block:: bash
+
+ # Create a string descriptor in us-EN and link to it from the function
+ # root. The name of the link is significant here, as it declares this
+ # descriptor to be intended for the Interface Association Descriptor.
+ # Other significant link names at function root are vs0_desc and vs1_desc
+ # For the VideoStreaming Interface 0/1 Descriptors.
+
+ mkdir -p $GADGET/strings/0x409/iad_desc
+ echo -n "Interface Associaton Descriptor" > $GADGET/strings/0x409/iad_desc/s
+ ln -s $GADGET/strings/0x409/iad_desc $FUNCTION/iad_desc
+
+ # Because the link to a String Descriptor from an Extension Unit clearly
+ # associates the two, the name of this link is not significant and may
+ # be set freely.
+
+ mkdir -p $GADGET/strings/0x409/xu.0
+ echo -n "A Very Useful Extension Unit" > $GADGET/strings/0x409/xu.0/s
+ ln -s $GADGET/strings/0x409/xu.0 $FUNCTION/control/extensions/xu.0
+
+The interrupt endpoint
+~~~~~~~~~~~~~~~~~~~~~~
+
+The VideoControl interface has an optional interrupt endpoint which is by default
+disabled. This is intended to support delayed response control set requests for
+UVC (which should respond through the interrupt endpoint rather than tying up
+endpoint 0). At present support for sending data through this endpoint is missing
+and so it is left disabled to avoid confusion. If you wish to enable it you can
+do so through the configfs attribute:
+
+.. code-block:: bash
+
+ echo 1 > $FUNCTION/control/enable_interrupt_ep
+
+Bandwidth configuration
+~~~~~~~~~~~~~~~~~~~~~~~
+
+There are three attributes which control the bandwidth of the USB connection.
+These live in the function root and can be set within limits:
+
+.. code-block:: bash
+
+ # streaming_interval sets bInterval. Values range from 1..255
+ echo 1 > $FUNCTION/streaming_interval
+
+ # streaming_maxpacket sets wMaxPacketSize. Valid values are 1024/2048/3072
+ echo 3072 > $FUNCTION/streaming_maxpacket
+
+ # streaming_maxburst sets bMaxBurst. Valid values are 1..15
+ echo 1 > $FUNCTION/streaming_maxburst
+
+
+The values passed here will be clamped to valid values according to the UVC
+specification (which depend on the speed of the USB connection). To understand
+how the settings influence bandwidth you should consult the UVC specifications,
+but a rule of thumb is that increasing the streaming_maxpacket setting will
+improve bandwidth (and thus the maximum possible framerate), whilst the same is
+true for streaming_maxburst provided the USB connection is running at SuperSpeed.
+Increasing streaming_interval will reduce bandwidth and framerate.
+
+The userspace application
+-------------------------
+By itself, the UVC Gadget driver cannot do anything particularly interesting. It
+must be paired with a userspace program that responds to UVC control requests and
+fills buffers to be queued to the V4L2 device that the driver creates. How those
+things are achieved is implementation dependent and beyond the scope of this
+document, but a reference application can be found at https://gitlab.freedesktop.org/camera/uvc-gadget
diff --git a/Documentation/usb/index.rst b/Documentation/usb/index.rst
index b656c9b..27955da 100644
--- a/Documentation/usb/index.rst
+++ b/Documentation/usb/index.rst
@@ -16,6 +16,7 @@
gadget_multi
gadget_printer
gadget_serial
+ gadget_uvc
gadget-testing
iuu_phoenix
mass-storage
diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst
index 2122e0c..a22442b 100644
--- a/Documentation/userspace-api/netlink/specs.rst
+++ b/Documentation/userspace-api/netlink/specs.rst
@@ -24,7 +24,8 @@
This document describes details of the schema.
See :doc:`intro-specs` for a practical starting guide.
-All specs must be licensed under ``GPL-2.0-only OR BSD-3-Clause``
+All specs must be licensed under
+``((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)``
to allow for easy adoption in user space code.
Compatibility levels
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 48fad65..e0c63de 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6030,6 +6030,44 @@
The "pad" and "reserved" fields may be used for future extensions and should be
set to 0s by userspace.
+4.138 KVM_ARM_SET_COUNTER_OFFSET
+--------------------------------
+
+:Capability: KVM_CAP_COUNTER_OFFSET
+:Architectures: arm64
+:Type: vm ioctl
+:Parameters: struct kvm_arm_counter_offset (in)
+:Returns: 0 on success, < 0 on error
+
+This capability indicates that userspace is able to apply a single VM-wide
+offset to both the virtual and physical counters as viewed by the guest
+using the KVM_ARM_SET_CNT_OFFSET ioctl and the following data structure:
+
+::
+
+ struct kvm_arm_counter_offset {
+ __u64 counter_offset;
+ __u64 reserved;
+ };
+
+The offset describes a number of counter cycles that are subtracted from
+both virtual and physical counter views (similar to the effects of the
+CNTVOFF_EL2 and CNTPOFF_EL2 system registers, but only global). The offset
+always applies to all vcpus (already created or created after this ioctl)
+for this VM.
+
+It is userspace's responsibility to compute the offset based, for example,
+on previous values of the guest counters.
+
+Any value other than 0 for the "reserved" field may result in an error
+(-EINVAL) being returned. This ioctl can also return -EBUSY if any vcpu
+ioctl is issued concurrently.
+
+Note that using this ioctl results in KVM ignoring subsequent userspace
+writes to the CNTVCT_EL0 and CNTPCT_EL0 registers using the SET_ONE_REG
+interface. No error will be returned, but the resulting offset will not be
+applied.
+
5. The kvm_run structure
========================
@@ -6219,15 +6257,40 @@
__u64 nr;
__u64 args[6];
__u64 ret;
- __u32 longmode;
- __u32 pad;
+ __u64 flags;
} hypercall;
-Unused. This was once used for 'hypercall to userspace'. To implement
-such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
+
+It is strongly recommended that userspace use ``KVM_EXIT_IO`` (x86) or
+``KVM_EXIT_MMIO`` (all except s390) to implement functionality that
+requires a guest to interact with host userpace.
.. note:: KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
+For arm64:
+----------
+
+SMCCC exits can be enabled depending on the configuration of the SMCCC
+filter. See the Documentation/virt/kvm/devices/vm.rst
+``KVM_ARM_SMCCC_FILTER`` for more details.
+
+``nr`` contains the function ID of the guest's SMCCC call. Userspace is
+expected to use the ``KVM_GET_ONE_REG`` ioctl to retrieve the call
+parameters from the vCPU's GPRs.
+
+Definition of ``flags``:
+ - ``KVM_HYPERCALL_EXIT_SMC``: Indicates that the guest used the SMC
+ conduit to initiate the SMCCC call. If this bit is 0 then the guest
+ used the HVC conduit for the SMCCC call.
+
+ - ``KVM_HYPERCALL_EXIT_16BIT``: Indicates that the guest used a 16bit
+ instruction to initiate the SMCCC call. If this bit is 0 then the
+ guest used a 32bit instruction. An AArch64 guest always has this
+ bit set to 0.
+
+At the point of exit, PC points to the instruction immediately following
+the trapping instruction.
+
::
/* KVM_EXIT_TPR_ACCESS */
@@ -7267,6 +7330,7 @@
will clear DR6.RTM.
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
+--------------------------------------
:Architectures: x86, arm64, mips
:Parameters: args[0] whether feature should be enabled or not
diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst
index 147efec6..9d726e6 100644
--- a/Documentation/virt/kvm/devices/vm.rst
+++ b/Documentation/virt/kvm/devices/vm.rst
@@ -321,3 +321,82 @@
if it is enabled
:Returns: -EFAULT if the given address is not accessible from kernel space;
0 in case of success.
+
+6. GROUP: KVM_ARM_VM_SMCCC_CTRL
+===============================
+
+:Architectures: arm64
+
+6.1. ATTRIBUTE: KVM_ARM_VM_SMCCC_FILTER (w/o)
+---------------------------------------------
+
+:Parameters: Pointer to a ``struct kvm_smccc_filter``
+
+:Returns:
+
+ ====== ===========================================
+ EEXIST Range intersects with a previously inserted
+ or reserved range
+ EBUSY A vCPU in the VM has already run
+ EINVAL Invalid filter configuration
+ ENOMEM Failed to allocate memory for the in-kernel
+ representation of the SMCCC filter
+ ====== ===========================================
+
+Requests the installation of an SMCCC call filter described as follows::
+
+ enum kvm_smccc_filter_action {
+ KVM_SMCCC_FILTER_HANDLE = 0,
+ KVM_SMCCC_FILTER_DENY,
+ KVM_SMCCC_FILTER_FWD_TO_USER,
+ };
+
+ struct kvm_smccc_filter {
+ __u32 base;
+ __u32 nr_functions;
+ __u8 action;
+ __u8 pad[15];
+ };
+
+The filter is defined as a set of non-overlapping ranges. Each
+range defines an action to be applied to SMCCC calls within the range.
+Userspace can insert multiple ranges into the filter by using
+successive calls to this attribute.
+
+The default configuration of KVM is such that all implemented SMCCC
+calls are allowed. Thus, the SMCCC filter can be defined sparsely
+by userspace, only describing ranges that modify the default behavior.
+
+The range expressed by ``struct kvm_smccc_filter`` is
+[``base``, ``base + nr_functions``). The range is not allowed to wrap,
+i.e. userspace cannot rely on ``base + nr_functions`` overflowing.
+
+The SMCCC filter applies to both SMC and HVC calls initiated by the
+guest. The SMCCC filter gates the in-kernel emulation of SMCCC calls
+and as such takes effect before other interfaces that interact with
+SMCCC calls (e.g. hypercall bitmap registers).
+
+Actions:
+
+ - ``KVM_SMCCC_FILTER_HANDLE``: Allows the guest SMCCC call to be
+ handled in-kernel. It is strongly recommended that userspace *not*
+ explicitly describe the allowed SMCCC call ranges.
+
+ - ``KVM_SMCCC_FILTER_DENY``: Rejects the guest SMCCC call in-kernel
+ and returns to the guest.
+
+ - ``KVM_SMCCC_FILTER_FWD_TO_USER``: The guest SMCCC call is forwarded
+ to userspace with an exit reason of ``KVM_EXIT_HYPERCALL``.
+
+The ``pad`` field is reserved for future use and must be zero. KVM may
+return ``-EINVAL`` if the field is nonzero.
+
+KVM reserves the 'Arm Architecture Calls' range of function IDs and
+will reject attempts to define a filter for any portion of these ranges:
+
+ =========== ===============
+ Start End (inclusive)
+ =========== ===============
+ 0x8000_0000 0x8000_FFFF
+ 0xC000_0000 0xC000_FFFF
+ =========== ===============
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 14c4e9f..8c77554 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -21,7 +21,7 @@
- kvm->mn_active_invalidate_count ensures that pairs of
invalidate_range_start() and invalidate_range_end() callbacks
use the same memslots array. kvm->slots_lock and kvm->slots_arch_lock
- are taken on the waiting side in install_new_memslots, so MMU notifiers
+ are taken on the waiting side when modifying memslots, so MMU notifiers
must not take either kvm->slots_lock or kvm->slots_arch_lock.
For SRCU:
diff --git a/MAINTAINERS b/MAINTAINERS
index ec57c42..1dc8bd2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5971,7 +5971,7 @@
F: include/uapi/linux/dm-*.h
DEVLINK
-M: Jiri Pirko <jiri@nvidia.com>
+M: Jiri Pirko <jiri@resnulli.us>
L: netdev@vger.kernel.org
S: Supported
F: Documentation/networking/devlink
@@ -14872,12 +14872,12 @@
L: linux-nvme@lists.infradead.org
S: Supported
W: http://git.infradead.org/nvme.git
-T: git://git.infradead.org/nvme.git
+T: git git://git.infradead.org/nvme.git
F: Documentation/nvme/
-F: drivers/nvme/host/
F: drivers/nvme/common/
-F: include/linux/nvme.h
+F: drivers/nvme/host/
F: include/linux/nvme-*.h
+F: include/linux/nvme.h
F: include/uapi/linux/nvme_ioctl.h
NVM EXPRESS FABRICS AUTHENTICATION
@@ -14912,7 +14912,7 @@
L: linux-nvme@lists.infradead.org
S: Supported
W: http://git.infradead.org/nvme.git
-T: git://git.infradead.org/nvme.git
+T: git git://git.infradead.org/nvme.git
F: drivers/nvme/target/
NVMEM FRAMEWORK
@@ -15079,7 +15079,7 @@
F: drivers/hwmon/nzxt-smart2.c
OBJAGG
-M: Jiri Pirko <jiri@nvidia.com>
+M: Jiri Pirko <jiri@resnulli.us>
L: netdev@vger.kernel.org
S: Supported
F: include/linux/objagg.h
@@ -15853,7 +15853,7 @@
F: include/linux/hp_sdc.h
PARMAN
-M: Jiri Pirko <jiri@nvidia.com>
+M: Jiri Pirko <jiri@resnulli.us>
L: netdev@vger.kernel.org
S: Supported
F: include/linux/parman.h
@@ -17990,7 +17990,7 @@
F: Documentation/devicetree/bindings/usb/microchip,mpfs-musb.yaml
F: arch/riscv/boot/dts/microchip/
F: drivers/char/hw_random/mpfs-rng.c
-F: drivers/clk/microchip/clk-mpfs.c
+F: drivers/clk/microchip/clk-mpfs*.c
F: drivers/i2c/busses/i2c-microchip-corei2c.c
F: drivers/mailbox/mailbox-mpfs.c
F: drivers/pci/controller/pcie-microchip-host.c
@@ -19150,9 +19150,7 @@
F: drivers/net/ethernet/sis/sis900.*
SIS FRAMEBUFFER DRIVER
-M: Thomas Winischhofer <thomas@winischhofer.net>
-S: Maintained
-W: http://www.winischhofer.net/linuxsisvga.shtml
+S: Orphan
F: Documentation/fb/sisfb.rst
F: drivers/video/fbdev/sis/
F: include/video/sisfb.h
@@ -21645,6 +21643,7 @@
M: Valentina Manea <valentina.manea.m@gmail.com>
M: Shuah Khan <shuah@kernel.org>
M: Shuah Khan <skhan@linuxfoundation.org>
+R: Hongren Zheng <i@zenithal.me>
L: linux-usb@vger.kernel.org
S: Maintained
F: Documentation/usb/usbip_protocol.rst
diff --git a/Makefile b/Makefile
index c933ceb..da2586d 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 3
SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc4
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
@@ -274,8 +274,7 @@
cscope gtags TAGS tags help% %docs check% coccicheck \
$(version_h) headers headers_% archheaders archscripts \
%asm-generic kernelversion %src-pkg dt_binding_check \
- outputmakefile rustavailable rustfmt rustfmtcheck \
- scripts_package
+ outputmakefile rustavailable rustfmt rustfmtcheck
# Installation targets should not require compiler. Unfortunately, vdso_install
# is an exception where build artifacts may be updated. This must be fixed.
no-compiler-targets := $(no-dot-config-targets) install dtbs_install \
@@ -1605,7 +1604,7 @@
certs/signing_key.pem \
certs/x509.genkey \
vmlinux-gdb.py \
- *.spec \
+ *.spec rpmbuild \
rust/libmacros.so
# clean - Delete most, but leave enough to build external modules
@@ -1656,10 +1655,6 @@
%pkg: include/config/kernel.release FORCE
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.package $@
-PHONY += scripts_package
-scripts_package: scripts_basic
- $(Q)$(MAKE) $(build)=scripts scripts/list-gitignored
-
# Brief documentation of the typical targets used
# ---------------------------------------------------------------------------
@@ -1886,6 +1881,8 @@
else # KBUILD_EXTMOD
+filechk_kernel.release = echo $(KERNELRELEASE)
+
###
# External module support.
# When building external modules the kernel used as basis is considered
diff --git a/arch/arm/boot/dts/e60k02.dtsi b/arch/arm/boot/dts/e60k02.dtsi
index 94944cc..dd03e38 100644
--- a/arch/arm/boot/dts/e60k02.dtsi
+++ b/arch/arm/boot/dts/e60k02.dtsi
@@ -311,6 +311,7 @@ &usdhc3 {
&usbotg1 {
pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usbotg1>;
disable-over-current;
srp-disable;
hnp-disable;
diff --git a/arch/arm/boot/dts/e70k02.dtsi b/arch/arm/boot/dts/e70k02.dtsi
index ace3eb8..4e1bf08 100644
--- a/arch/arm/boot/dts/e70k02.dtsi
+++ b/arch/arm/boot/dts/e70k02.dtsi
@@ -321,6 +321,7 @@ &usdhc3 {
&usbotg1 {
pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usbotg1>;
disable-over-current;
srp-disable;
hnp-disable;
diff --git a/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts b/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts
index da13990..815119c 100644
--- a/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts
+++ b/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts
@@ -625,6 +625,7 @@ &usdhc3 {
&usbotg1 {
pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usbotg1>;
disable-over-current;
srp-disable;
hnp-disable;
diff --git a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts
index de2fb1c..b823812 100644
--- a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts
+++ b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts
@@ -27,6 +27,16 @@ chosen {
};
reserved-memory {
+ sbl_region: sbl@2f00000 {
+ reg = <0x02f00000 0x100000>;
+ no-map;
+ };
+
+ external_image_region: external-image@3100000 {
+ reg = <0x03100000 0x200000>;
+ no-map;
+ };
+
adsp_region: adsp@3300000 {
reg = <0x03300000 0x1400000>;
no-map;
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 14eecaa..e4c2677 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -116,7 +116,7 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
tocopy = n;
ua_flags = uaccess_save_and_enable();
- memcpy((void *)to, from, tocopy);
+ __memcpy((void *)to, from, tocopy);
uaccess_restore(ua_flags);
to += tocopy;
from += tocopy;
@@ -178,7 +178,7 @@ __clear_user_memset(void __user *addr, unsigned long n)
tocopy = n;
ua_flags = uaccess_save_and_enable();
- memset((void *)addr, 0, tocopy);
+ __memset((void *)addr, 0, tocopy);
uaccess_restore(ua_flags);
addr += tocopy;
n -= tocopy;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts
index af9194e..73eb606 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts
@@ -56,14 +56,10 @@ qsgmii_phy3: ethernet-phy@10 {
};
&enetc_port2 {
- nvmem-cells = <&base_mac_address 2>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
&enetc_port3 {
- nvmem-cells = <&base_mac_address 3>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
@@ -84,8 +80,6 @@ &mscc_felix_port0 {
managed = "in-band-status";
phy-handle = <&qsgmii_phy0>;
phy-mode = "qsgmii";
- nvmem-cells = <&base_mac_address 4>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
@@ -94,8 +88,6 @@ &mscc_felix_port1 {
managed = "in-band-status";
phy-handle = <&qsgmii_phy1>;
phy-mode = "qsgmii";
- nvmem-cells = <&base_mac_address 5>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
@@ -104,8 +96,6 @@ &mscc_felix_port2 {
managed = "in-band-status";
phy-handle = <&qsgmii_phy2>;
phy-mode = "qsgmii";
- nvmem-cells = <&base_mac_address 6>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
@@ -114,8 +104,6 @@ &mscc_felix_port3 {
managed = "in-band-status";
phy-handle = <&qsgmii_phy3>;
phy-mode = "qsgmii";
- nvmem-cells = <&base_mac_address 7>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts
index 1f34c75..7cd29ab 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts
@@ -55,7 +55,5 @@ &enetc_port0 {
&enetc_port1 {
phy-handle = <&phy0>;
phy-mode = "rgmii-id";
- nvmem-cells = <&base_mac_address 0>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts
index aac4119..113b1df 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts
@@ -36,14 +36,10 @@ &enetc_port0 {
};
&enetc_port2 {
- nvmem-cells = <&base_mac_address 2>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
&enetc_port3 {
- nvmem-cells = <&base_mac_address 3>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
@@ -56,8 +52,6 @@ &mscc_felix_port0 {
managed = "in-band-status";
phy-handle = <&phy0>;
phy-mode = "sgmii";
- nvmem-cells = <&base_mac_address 0>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
@@ -66,8 +60,6 @@ &mscc_felix_port1 {
managed = "in-band-status";
phy-handle = <&phy1>;
phy-mode = "sgmii";
- nvmem-cells = <&base_mac_address 1>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts
index a4421db..9b5e92f 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts
@@ -43,7 +43,5 @@ vddh: vddh-regulator {
&enetc_port1 {
phy-handle = <&phy1>;
phy-mode = "rgmii-id";
- nvmem-cells = <&base_mac_address 1>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
index 8b65af4..4ab17b98 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
@@ -92,8 +92,6 @@ &enetc_port0 {
phy-handle = <&phy0>;
phy-mode = "sgmii";
managed = "in-band-status";
- nvmem-cells = <&base_mac_address 0>;
- nvmem-cell-names = "mac-address";
status = "okay";
};
@@ -156,21 +154,6 @@ partition@3e0000 {
label = "bootloader environment";
};
};
-
- otp-1 {
- compatible = "user-otp";
-
- nvmem-layout {
- compatible = "kontron,sl28-vpd";
-
- serial_number: serial-number {
- };
-
- base_mac_address: base-mac-address {
- #nvmem-cell-cells = <1>;
- };
- };
- };
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
index 1f3d225..06b94bb 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
@@ -117,7 +117,7 @@ flexspi0: spi@5d120000 {
interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX_SC_R_FSPI_0 IMX_SC_PM_CLK_PER>,
<&clk IMX_SC_R_FSPI_0 IMX_SC_PM_CLK_PER>;
- clock-names = "fspi", "fspi_en";
+ clock-names = "fspi_en", "fspi";
power-domains = <&pd IMX_SC_R_FSPI_0>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts b/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts
index 1bcf228..8524203 100644
--- a/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts
@@ -121,8 +121,6 @@ &eqos {
phy-handle = <ðphy0>;
nvmem-cells = <&fec_mac1>;
nvmem-cell-names = "mac-address";
- snps,reset-gpios = <&pca6416_1 2 GPIO_ACTIVE_LOW>;
- snps,reset-delays-us = <10 20 200000>;
status = "okay";
mdio {
@@ -136,6 +134,9 @@ ethphy0: ethernet-phy@0 {
eee-broken-1000t;
qca,disable-smarteee;
qca,disable-hibernation-mode;
+ reset-gpios = <&pca6416_1 2 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <20>;
+ reset-deassert-us = <200000>;
vddio-supply = <&vddio0>;
vddio0: vddio-regulator {
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts
index 6357078..0e8f0d7 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts
@@ -247,7 +247,7 @@ wm8960: codec@1a {
compatible = "wlf,wm8960";
reg = <0x1a>;
clocks = <&clk IMX8MM_CLK_SAI1_ROOT>;
- clock-names = "mclk1";
+ clock-names = "mclk";
wlf,shared-lrclk;
#sound-dai-cells = <0>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
index ed9ac6c..9e0ddd6 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
@@ -296,6 +296,7 @@ spba2: spba-bus@30000000 {
sai2: sai@30020000 {
compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x30020000 0x10000>;
+ #sound-dai-cells = <0>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI2_IPG>,
<&clk IMX8MN_CLK_DUMMY>,
@@ -310,6 +311,7 @@ sai2: sai@30020000 {
sai3: sai@30030000 {
compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x30030000 0x10000>;
+ #sound-dai-cells = <0>;
interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI3_IPG>,
<&clk IMX8MN_CLK_DUMMY>,
@@ -324,6 +326,7 @@ sai3: sai@30030000 {
sai5: sai@30050000 {
compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x30050000 0x10000>;
+ #sound-dai-cells = <0>;
interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI5_IPG>,
<&clk IMX8MN_CLK_DUMMY>,
@@ -340,6 +343,7 @@ sai5: sai@30050000 {
sai6: sai@30060000 {
compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x30060000 0x10000>;
+ #sound-dai-cells = <0>;
interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI6_IPG>,
<&clk IMX8MN_CLK_DUMMY>,
@@ -397,6 +401,7 @@ spdif1: spdif@30090000 {
sai7: sai@300b0000 {
compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x300b0000 0x10000>;
+ #sound-dai-cells = <0>;
interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI7_IPG>,
<&clk IMX8MN_CLK_DUMMY>,
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index a19224f..2dd60e3 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -1131,8 +1131,8 @@ lcdif2: display-controller@32e90000 {
reg = <0x32e90000 0x238>;
interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MP_CLK_MEDIA_DISP2_PIX_ROOT>,
- <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>,
- <&clk IMX8MP_CLK_MEDIA_APB_ROOT>;
+ <&clk IMX8MP_CLK_MEDIA_APB_ROOT>,
+ <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>;
clock-names = "pix", "axi", "disp_axi";
assigned-clocks = <&clk IMX8MP_CLK_MEDIA_DISP2_PIX>,
<&clk IMX8MP_VIDEO_PLL1>;
diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi
index 2076f9c..41efd97 100644
--- a/arch/arm64/boot/dts/freescale/imx93.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93.dtsi
@@ -164,6 +164,8 @@ tpm2: pwm@44320000 {
lpi2c1: i2c@44340000 {
compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c";
reg = <0x44340000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX93_CLK_LPI2C1_GATE>,
<&clk IMX93_CLK_BUS_AON>;
@@ -174,6 +176,8 @@ lpi2c1: i2c@44340000 {
lpi2c2: i2c@44350000 {
compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c";
reg = <0x44350000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX93_CLK_LPI2C2_GATE>,
<&clk IMX93_CLK_BUS_AON>;
@@ -343,6 +347,8 @@ tpm6: pwm@42510000 {
lpi2c3: i2c@42530000 {
compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c";
reg = <0x42530000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX93_CLK_LPI2C3_GATE>,
<&clk IMX93_CLK_BUS_WAKEUP>;
@@ -353,6 +359,8 @@ lpi2c3: i2c@42530000 {
lpi2c4: i2c@42540000 {
compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c";
reg = <0x42540000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX93_CLK_LPI2C4_GATE>,
<&clk IMX93_CLK_BUS_WAKEUP>;
@@ -455,6 +463,8 @@ lpuart8: serial@426a0000 {
lpi2c5: i2c@426b0000 {
compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c";
reg = <0x426b0000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupts = <GIC_SPI 195 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX93_CLK_LPI2C5_GATE>,
<&clk IMX93_CLK_BUS_WAKEUP>;
@@ -465,6 +475,8 @@ lpi2c5: i2c@426b0000 {
lpi2c6: i2c@426c0000 {
compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c";
reg = <0x426c0000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupts = <GIC_SPI 196 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX93_CLK_LPI2C6_GATE>,
<&clk IMX93_CLK_BUS_WAKEUP>;
@@ -475,6 +487,8 @@ lpi2c6: i2c@426c0000 {
lpi2c7: i2c@426d0000 {
compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c";
reg = <0x426d0000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupts = <GIC_SPI 197 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX93_CLK_LPI2C7_GATE>,
<&clk IMX93_CLK_BUS_WAKEUP>;
@@ -485,6 +499,8 @@ lpi2c7: i2c@426d0000 {
lpi2c8: i2c@426e0000 {
compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c";
reg = <0x426e0000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupts = <GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX93_CLK_LPI2C8_GATE>,
<&clk IMX93_CLK_BUS_WAKEUP>;
@@ -580,9 +596,9 @@ usdhc2: mmc@42860000 {
eqos: ethernet@428a0000 {
compatible = "nxp,imx93-dwmac-eqos", "snps,dwmac-5.10a";
reg = <0x428a0000 0x10000>;
- interrupts = <GIC_SPI 183 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "eth_wake_irq", "macirq";
+ interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 183 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_wake_irq";
clocks = <&clk IMX93_CLK_ENET_QOS_GATE>,
<&clk IMX93_CLK_ENET_QOS_GATE>,
<&clk IMX93_CLK_ENET_TIMER2>,
@@ -595,7 +611,7 @@ eqos: ethernet@428a0000 {
<&clk IMX93_CLK_SYS_PLL_PFD0_DIV2>;
assigned-clock-rates = <100000000>, <250000000>;
intf_mode = <&wakeupmix_gpr 0x28>;
- clk_csr = <0>;
+ snps,clk-csr = <0>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index 133dbe5..7096b99 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -22,7 +22,7 @@ bus@0 {
#address-cells = <2>;
#size-cells = <2>;
- ranges = <0x0 0x0 0x0 0x0 0x0 0x40000000>;
+ ranges = <0x0 0x0 0x0 0x0 0x100 0x0>;
apbmisc: misc@100000 {
compatible = "nvidia,tegra194-misc";
diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
index 8fe8eda..f1748cf 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
@@ -20,7 +20,7 @@ bus@0 {
#address-cells = <2>;
#size-cells = <2>;
- ranges = <0x0 0x0 0x0 0x0 0x0 0x40000000>;
+ ranges = <0x0 0x0 0x0 0x0 0x100 0x0>;
misc@100000 {
compatible = "nvidia,tegra234-misc";
diff --git a/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts b/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts
index c492db8..82e2603 100644
--- a/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts
+++ b/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts
@@ -33,7 +33,3 @@ &button_default {
&gpio_leds_default {
pins = "gpio81", "gpio82", "gpio83";
};
-
-&sim_ctrl_default {
- pins = "gpio1", "gpio2";
-};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts b/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts
index 700cf81..8433c97 100644
--- a/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts
+++ b/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts
@@ -25,6 +25,11 @@ &led_b {
gpios = <&msmgpio 20 GPIO_ACTIVE_HIGH>;
};
+&mpss {
+ pinctrl-0 = <&sim_ctrl_default>;
+ pinctrl-names = "default";
+};
+
&button_default {
pins = "gpio37";
bias-pull-down;
@@ -34,6 +39,25 @@ &gpio_leds_default {
pins = "gpio20", "gpio21", "gpio22";
};
-&sim_ctrl_default {
- pins = "gpio1", "gpio2";
+/* This selects the external SIM card slot by default */
+&msmgpio {
+ sim_ctrl_default: sim-ctrl-default-state {
+ esim-sel-pins {
+ pins = "gpio0", "gpio3";
+ bias-disable;
+ output-low;
+ };
+
+ sim-en-pins {
+ pins = "gpio1";
+ bias-disable;
+ output-low;
+ };
+
+ sim-sel-pins {
+ pins = "gpio2";
+ bias-disable;
+ output-high;
+ };
+ };
};
diff --git a/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi b/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi
index 790a969..cdf34b7 100644
--- a/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi
@@ -92,9 +92,6 @@ &gcc {
};
&mpss {
- pinctrl-0 = <&sim_ctrl_default>;
- pinctrl-names = "default";
-
status = "okay";
};
@@ -240,11 +237,4 @@ gpio_leds_default: gpio-leds-default-state {
drive-strength = <2>;
bias-disable;
};
-
- sim_ctrl_default: sim-ctrl-default-state {
- function = "gpio";
- drive-strength = <2>;
- bias-disable;
- output-low;
- };
};
diff --git a/arch/arm64/boot/dts/qcom/sa8540p-ride.dts b/arch/arm64/boot/dts/qcom/sa8540p-ride.dts
index 3ccb5ff..24fa449 100644
--- a/arch/arm64/boot/dts/qcom/sa8540p-ride.dts
+++ b/arch/arm64/boot/dts/qcom/sa8540p-ride.dts
@@ -241,7 +241,7 @@ &qup2 {
};
&remoteproc_nsp0 {
- firmware-name = "qcom/sa8540p/cdsp.mbn";
+ firmware-name = "qcom/sa8540p/cdsp0.mbn";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
index bdcb749..8f4ab6b 100644
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -2131,6 +2131,8 @@ pcie1: pci@1c08000 {
pinctrl-names = "default";
pinctrl-0 = <&pcie1_clkreq_n>;
+ dma-coherent;
+
iommus = <&apps_smmu 0x1c80 0x1>;
iommu-map = <0x0 &apps_smmu 0x1c80 0x1>,
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index 98e71b9..99c6d65 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -370,6 +370,7 @@ vreg_s10b: smps10 {
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-always-on;
};
vreg_s11b: smps11 {
@@ -377,6 +378,7 @@ vreg_s11b: smps11 {
regulator-min-microvolt = <1272000>;
regulator-max-microvolt = <1272000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-always-on;
};
vreg_s12b: smps12 {
@@ -384,6 +386,7 @@ vreg_s12b: smps12 {
regulator-min-microvolt = <984000>;
regulator-max-microvolt = <984000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-always-on;
};
vreg_l3b: ldo3 {
@@ -441,6 +444,7 @@ vreg_bob: bob {
regulator-min-microvolt = <3008000>;
regulator-max-microvolt = <3960000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_AUTO>;
+ regulator-always-on;
};
};
@@ -772,75 +776,88 @@ &pmk8280_vadc {
pmic-die-temp@3 {
reg = <PMK8350_ADC7_DIE_TEMP>;
qcom,pre-scaling = <1 1>;
+ label = "pmk8350_die_temp";
};
xo-therm@44 {
reg = <PMK8350_ADC7_AMUX_THM1_100K_PU>;
qcom,hw-settle-time = <200>;
qcom,ratiometric;
+ label = "pmk8350_xo_therm";
};
pmic-die-temp@103 {
reg = <PM8350_ADC7_DIE_TEMP(1)>;
qcom,pre-scaling = <1 1>;
+ label = "pmc8280_1_die_temp";
};
sys-therm@144 {
reg = <PM8350_ADC7_AMUX_THM1_100K_PU(1)>;
qcom,hw-settle-time = <200>;
qcom,ratiometric;
+ label = "sys_therm1";
};
sys-therm@145 {
reg = <PM8350_ADC7_AMUX_THM2_100K_PU(1)>;
qcom,hw-settle-time = <200>;
qcom,ratiometric;
+ label = "sys_therm2";
};
sys-therm@146 {
reg = <PM8350_ADC7_AMUX_THM3_100K_PU(1)>;
qcom,hw-settle-time = <200>;
qcom,ratiometric;
+ label = "sys_therm3";
};
sys-therm@147 {
reg = <PM8350_ADC7_AMUX_THM4_100K_PU(1)>;
qcom,hw-settle-time = <200>;
qcom,ratiometric;
+ label = "sys_therm4";
};
pmic-die-temp@303 {
reg = <PM8350_ADC7_DIE_TEMP(3)>;
qcom,pre-scaling = <1 1>;
+ label = "pmc8280_2_die_temp";
};
sys-therm@344 {
reg = <PM8350_ADC7_AMUX_THM1_100K_PU(3)>;
qcom,hw-settle-time = <200>;
qcom,ratiometric;
+ label = "sys_therm5";
};
sys-therm@345 {
reg = <PM8350_ADC7_AMUX_THM2_100K_PU(3)>;
qcom,hw-settle-time = <200>;
qcom,ratiometric;
+ label = "sys_therm6";
};
sys-therm@346 {
reg = <PM8350_ADC7_AMUX_THM3_100K_PU(3)>;
qcom,hw-settle-time = <200>;
qcom,ratiometric;
+ label = "sys_therm7";
};
sys-therm@347 {
reg = <PM8350_ADC7_AMUX_THM4_100K_PU(3)>;
qcom,hw-settle-time = <200>;
qcom,ratiometric;
+ label = "sys_therm8";
};
pmic-die-temp@403 {
reg = <PMR735A_ADC7_DIE_TEMP>;
qcom,pre-scaling = <1 1>;
+ label = "pmr735a_die_temp";
};
};
@@ -884,9 +901,9 @@ &sound {
"VA DMIC0", "MIC BIAS1",
"VA DMIC1", "MIC BIAS1",
"VA DMIC2", "MIC BIAS3",
- "TX DMIC0", "MIC BIAS1",
- "TX DMIC1", "MIC BIAS2",
- "TX DMIC2", "MIC BIAS3",
+ "VA DMIC0", "VA MIC BIAS1",
+ "VA DMIC1", "VA MIC BIAS1",
+ "VA DMIC2", "VA MIC BIAS3",
"TX SWR_ADC1", "ADC2_OUTPUT";
wcd-playback-dai-link {
@@ -937,7 +954,7 @@ platform {
va-dai-link {
link-name = "VA Capture";
cpu {
- sound-dai = <&q6apmbedai TX_CODEC_DMA_TX_3>;
+ sound-dai = <&q6apmbedai VA_CODEC_DMA_TX_0>;
};
platform {
@@ -1062,7 +1079,7 @@ &vamacro {
vdd-micb-supply = <&vreg_s10b>;
- qcom,dmic-sample-rate = <600000>;
+ qcom,dmic-sample-rate = <4800000>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
index 0d02599..42bfa9f 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
@@ -2504,12 +2504,12 @@ swr1: soundwire-controller@3210000 {
qcom,ports-sinterval-low = /bits/ 8 <0x03 0x1f 0x1f 0x07 0x00>;
qcom,ports-offset1 = /bits/ 8 <0x00 0x00 0x0B 0x01 0x00>;
qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x0B 0x00 0x00>;
- qcom,ports-hstart = /bits/ 8 <0xff 0x03 0xff 0xff 0xff>;
- qcom,ports-hstop = /bits/ 8 <0xff 0x06 0xff 0xff 0xff>;
+ qcom,ports-hstart = /bits/ 8 <0xff 0x03 0x00 0xff 0xff>;
+ qcom,ports-hstop = /bits/ 8 <0xff 0x06 0x0f 0xff 0xff>;
qcom,ports-word-length = /bits/ 8 <0x01 0x07 0x04 0xff 0xff>;
- qcom,ports-block-pack-mode = /bits/ 8 <0xff 0x00 0x01 0xff 0xff>;
+ qcom,ports-block-pack-mode = /bits/ 8 <0xff 0xff 0x01 0xff 0xff>;
qcom,ports-lane-control = /bits/ 8 <0x01 0x00 0x00 0x00 0x00>;
- qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0x00>;
+ qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0xff>;
#sound-dai-cells = <1>;
#address-cells = <2>;
@@ -2600,7 +2600,7 @@ swr2: soundwire-controller@3330000 {
<&intc GIC_SPI 520 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "core", "wake";
- clocks = <&vamacro>;
+ clocks = <&txmacro>;
clock-names = "iface";
label = "TX";
#sound-dai-cells = <1>;
@@ -2609,15 +2609,15 @@ swr2: soundwire-controller@3330000 {
qcom,din-ports = <4>;
qcom,dout-ports = <0>;
- qcom,ports-sinterval-low = /bits/ 8 <0x01 0x03 0x03 0x03>;
- qcom,ports-offset1 = /bits/ 8 <0x01 0x00 0x02 0x01>;
+ qcom,ports-sinterval-low = /bits/ 8 <0x01 0x01 0x03 0x03>;
+ qcom,ports-offset1 = /bits/ 8 <0x01 0x00 0x02 0x00>;
qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x00 0x00>;
qcom,ports-block-pack-mode = /bits/ 8 <0xff 0xff 0xff 0xff>;
qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff>;
qcom,ports-hstop = /bits/ 8 <0xff 0xff 0xff 0xff>;
- qcom,ports-word-length = /bits/ 8 <0xff 0x00 0xff 0xff>;
+ qcom,ports-word-length = /bits/ 8 <0xff 0xff 0xff 0xff>;
qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff>;
- qcom,ports-lane-control = /bits/ 8 <0x00 0x01 0x00 0x00>;
+ qcom,ports-lane-control = /bits/ 8 <0x00 0x01 0x00 0x01>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi
index 4d6ec81..fbd67d2 100644
--- a/arch/arm64/boot/dts/qcom/sm6115.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi
@@ -1078,6 +1078,7 @@ spi5: spi@4a94000 {
dma-names = "tx", "rx";
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi
index 31b88c7..068ee4f 100644
--- a/arch/arm64/boot/dts/qcom/sm6375.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi
@@ -1209,6 +1209,7 @@ remoteproc_cdsp: remoteproc@b000000 {
clock-names = "xo";
power-domains = <&rpmpd SM6375_VDDCX>;
+ power-domain-names = "cx";
memory-region = <&pil_cdsp_mem>;
diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi
index fd20096..13e0ce8 100644
--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
@@ -1826,7 +1826,7 @@ pcie0: pci@1c00000 {
"slave_q2a",
"tbu";
- iommus = <&apps_smmu 0x1d80 0x7f>;
+ iommus = <&apps_smmu 0x1d80 0x3f>;
iommu-map = <0x0 &apps_smmu 0x1d80 0x1>,
<0x100 &apps_smmu 0x1d81 0x1>;
@@ -1925,7 +1925,7 @@ pcie1: pci@1c08000 {
assigned-clocks = <&gcc GCC_PCIE_1_AUX_CLK>;
assigned-clock-rates = <19200000>;
- iommus = <&apps_smmu 0x1e00 0x7f>;
+ iommus = <&apps_smmu 0x1e00 0x3f>;
iommu-map = <0x0 &apps_smmu 0x1e00 0x1>,
<0x100 &apps_smmu 0x1e01 0x1>;
diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts
index acaa99c5..a85d47f 100644
--- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts
+++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts
@@ -625,6 +625,6 @@ &ufs_mem_phy {
};
&venus {
- firmware-name = "qcom/sm8250/elish/venus.mbn";
+ firmware-name = "qcom/sm8250/xiaomi/elish/venus.mbn";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi
index 1c97e28..1a5a612 100644
--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi
@@ -1664,6 +1664,7 @@ ufs_mem_hc: ufshc@1d84000 {
power-domains = <&gcc UFS_PHY_GDSC>;
iommus = <&apps_smmu 0xe0 0x0>;
+ dma-coherent;
clock-names =
"core_clk",
diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi
index 1a744a3..b285b15 100644
--- a/arch/arm64/boot/dts/qcom/sm8450.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi
@@ -2143,8 +2143,8 @@ wsa2macro: codec@31e0000 {
<&q6prmcc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
<&vamacro>;
clock-names = "mclk", "npl", "macro", "dcodec", "fsgen";
- assigned-clocks = <&q6prmcc LPASS_CLK_ID_WSA_CORE_TX_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
- <&q6prmcc LPASS_CLK_ID_WSA_CORE_TX_2X_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>;
+ assigned-clocks = <&q6prmcc LPASS_CLK_ID_WSA2_CORE_TX_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6prmcc LPASS_CLK_ID_WSA2_CORE_TX_2X_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>;
assigned-clock-rates = <19200000>, <19200000>;
#clock-cells = <0>;
@@ -4003,6 +4003,7 @@ ufs_mem_hc: ufshc@1d84000 {
power-domains = <&gcc UFS_PHY_GDSC>;
iommus = <&apps_smmu 0xe0 0x0>;
+ dma-coherent;
interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>,
<&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>;
diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi
index ff4d342..5d08883 100644
--- a/arch/arm64/boot/dts/qcom/sm8550.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi
@@ -66,7 +66,7 @@ cpus {
CPU0: cpu@0 {
device_type = "cpu";
- compatible = "qcom,kryo";
+ compatible = "arm,cortex-a510";
reg = <0 0>;
enable-method = "psci";
next-level-cache = <&L2_0>;
@@ -89,7 +89,7 @@ L3_0: l3-cache {
CPU1: cpu@100 {
device_type = "cpu";
- compatible = "qcom,kryo";
+ compatible = "arm,cortex-a510";
reg = <0 0x100>;
enable-method = "psci";
next-level-cache = <&L2_100>;
@@ -108,7 +108,7 @@ L2_100: l2-cache {
CPU2: cpu@200 {
device_type = "cpu";
- compatible = "qcom,kryo";
+ compatible = "arm,cortex-a510";
reg = <0 0x200>;
enable-method = "psci";
next-level-cache = <&L2_200>;
@@ -127,7 +127,7 @@ L2_200: l2-cache {
CPU3: cpu@300 {
device_type = "cpu";
- compatible = "qcom,kryo";
+ compatible = "arm,cortex-a715";
reg = <0 0x300>;
enable-method = "psci";
next-level-cache = <&L2_300>;
@@ -146,7 +146,7 @@ L2_300: l2-cache {
CPU4: cpu@400 {
device_type = "cpu";
- compatible = "qcom,kryo";
+ compatible = "arm,cortex-a715";
reg = <0 0x400>;
enable-method = "psci";
next-level-cache = <&L2_400>;
@@ -165,7 +165,7 @@ L2_400: l2-cache {
CPU5: cpu@500 {
device_type = "cpu";
- compatible = "qcom,kryo";
+ compatible = "arm,cortex-a710";
reg = <0 0x500>;
enable-method = "psci";
next-level-cache = <&L2_500>;
@@ -184,7 +184,7 @@ L2_500: l2-cache {
CPU6: cpu@600 {
device_type = "cpu";
- compatible = "qcom,kryo";
+ compatible = "arm,cortex-a710";
reg = <0 0x600>;
enable-method = "psci";
next-level-cache = <&L2_600>;
@@ -203,7 +203,7 @@ L2_600: l2-cache {
CPU7: cpu@700 {
device_type = "cpu";
- compatible = "qcom,kryo";
+ compatible = "arm,cortex-x3";
reg = <0 0x700>;
enable-method = "psci";
next-level-cache = <&L2_700>;
@@ -1905,6 +1905,7 @@ ufs_mem_hc: ufs@1d84000 {
required-opps = <&rpmhpd_opp_nom>;
iommus = <&apps_smmu 0x60 0x0>;
+ dma-coherent;
interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>,
<&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>;
@@ -1997,7 +1998,7 @@ IPCC_MPROC_SIGNAL_GLINK_QMP
lpass_tlmm: pinctrl@6e80000 {
compatible = "qcom,sm8550-lpass-lpi-pinctrl";
reg = <0 0x06e80000 0 0x20000>,
- <0 0x0725a000 0 0x10000>;
+ <0 0x07250000 0 0x10000>;
gpio-controller;
#gpio-cells = <2>;
gpio-ranges = <&lpass_tlmm 0 0 23>;
@@ -2691,7 +2692,7 @@ qup_i2c0_data_clk: qup-i2c0-data-clk-state {
pins = "gpio28", "gpio29";
function = "qup1_se0";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c1_data_clk: qup-i2c1-data-clk-state {
@@ -2699,7 +2700,7 @@ qup_i2c1_data_clk: qup-i2c1-data-clk-state {
pins = "gpio32", "gpio33";
function = "qup1_se1";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c2_data_clk: qup-i2c2-data-clk-state {
@@ -2707,7 +2708,7 @@ qup_i2c2_data_clk: qup-i2c2-data-clk-state {
pins = "gpio36", "gpio37";
function = "qup1_se2";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c3_data_clk: qup-i2c3-data-clk-state {
@@ -2715,7 +2716,7 @@ qup_i2c3_data_clk: qup-i2c3-data-clk-state {
pins = "gpio40", "gpio41";
function = "qup1_se3";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c4_data_clk: qup-i2c4-data-clk-state {
@@ -2723,7 +2724,7 @@ qup_i2c4_data_clk: qup-i2c4-data-clk-state {
pins = "gpio44", "gpio45";
function = "qup1_se4";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c5_data_clk: qup-i2c5-data-clk-state {
@@ -2731,7 +2732,7 @@ qup_i2c5_data_clk: qup-i2c5-data-clk-state {
pins = "gpio52", "gpio53";
function = "qup1_se5";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c6_data_clk: qup-i2c6-data-clk-state {
@@ -2739,7 +2740,7 @@ qup_i2c6_data_clk: qup-i2c6-data-clk-state {
pins = "gpio48", "gpio49";
function = "qup1_se6";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c8_data_clk: qup-i2c8-data-clk-state {
@@ -2747,14 +2748,14 @@ scl-pins {
pins = "gpio57";
function = "qup2_se0_l1_mira";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
sda-pins {
pins = "gpio56";
function = "qup2_se0_l0_mira";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
};
@@ -2763,7 +2764,7 @@ qup_i2c9_data_clk: qup-i2c9-data-clk-state {
pins = "gpio60", "gpio61";
function = "qup2_se1";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c10_data_clk: qup-i2c10-data-clk-state {
@@ -2771,7 +2772,7 @@ qup_i2c10_data_clk: qup-i2c10-data-clk-state {
pins = "gpio64", "gpio65";
function = "qup2_se2";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c11_data_clk: qup-i2c11-data-clk-state {
@@ -2779,7 +2780,7 @@ qup_i2c11_data_clk: qup-i2c11-data-clk-state {
pins = "gpio68", "gpio69";
function = "qup2_se3";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c12_data_clk: qup-i2c12-data-clk-state {
@@ -2787,7 +2788,7 @@ qup_i2c12_data_clk: qup-i2c12-data-clk-state {
pins = "gpio2", "gpio3";
function = "qup2_se4";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c13_data_clk: qup-i2c13-data-clk-state {
@@ -2795,7 +2796,7 @@ qup_i2c13_data_clk: qup-i2c13-data-clk-state {
pins = "gpio80", "gpio81";
function = "qup2_se5";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_i2c15_data_clk: qup-i2c15-data-clk-state {
@@ -2803,7 +2804,7 @@ qup_i2c15_data_clk: qup-i2c15-data-clk-state {
pins = "gpio72", "gpio106";
function = "qup2_se7";
drive-strength = <2>;
- bias-pull-up;
+ bias-pull-up = <2200>;
};
qup_spi0_cs: qup-spi0-cs-state {
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 6f7b218..b9e3661 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/jump_label.h>
#include <linux/kvm_types.h>
+#include <linux/maple_tree.h>
#include <linux/percpu.h>
#include <linux/psci.h>
#include <asm/arch_gicv3.h>
@@ -199,6 +200,9 @@ struct kvm_arch {
/* Mandated version of PSCI */
u32 psci_version;
+ /* Protects VM-scoped configuration data */
+ struct mutex config_lock;
+
/*
* If we encounter a data abort without valid instruction syndrome
* information, report this to user space. User space can (and
@@ -221,7 +225,12 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_EL1_32BIT 4
/* PSCI SYSTEM_SUSPEND enabled for the guest */
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
-
+ /* VM counter offset */
+#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6
+ /* Timer PPIs made immutable */
+#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7
+ /* SMCCC filter initialized for the VM */
+#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8
unsigned long flags;
/*
@@ -242,6 +251,7 @@ struct kvm_arch {
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
+ struct maple_tree smccc_filter;
/*
* For an untrusted host VM, 'pkvm.handle' is used to lookup
@@ -365,6 +375,10 @@ enum vcpu_sysreg {
TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
+ CNTHP_CTL_EL2,
+ CNTHP_CVAL_EL2,
+ CNTHV_CTL_EL2,
+ CNTHV_CVAL_EL2,
NR_SYS_REGS /* Nothing after this line! */
};
@@ -522,6 +536,7 @@ struct kvm_vcpu_arch {
/* vcpu power state */
struct kvm_mp_state mp_state;
+ spinlock_t mp_state_lock;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
@@ -922,6 +937,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
int __init kvm_sys_reg_table_init(void);
+bool lock_all_vcpus(struct kvm *kvm);
+void unlock_all_vcpus(struct kvm *kvm);
+
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
@@ -1007,6 +1025,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
struct kvm_arm_copy_mte_tags *copy_tags);
+int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
+ struct kvm_arm_counter_offset *offset);
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
@@ -1061,6 +1081,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
(system_supports_32bit_el0() && \
!static_branch_unlikely(&arm64_mismatched_32bit_el0))
+#define kvm_vm_has_ran_once(kvm) \
+ (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
+
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
extern phys_addr_t hyp_mem_base;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 083cc47..27e63c1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -63,6 +63,7 @@
* specific registers encoded in the instructions).
*/
.macro kern_hyp_va reg
+#ifndef __KVM_VHE_HYPERVISOR__
alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
and \reg, \reg, #1 /* mask with va_mask */
ror \reg, \reg, #1 /* rotate to the first tag bit */
@@ -70,6 +71,7 @@ alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
ror \reg, \reg, #63 /* rotate back */
alternative_cb_end
+#endif
.endm
/*
@@ -127,6 +129,7 @@ void kvm_apply_hyp_relocations(void);
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
+#ifndef __KVM_VHE_HYPERVISOR__
asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
"ror %0, %0, #1\n"
"add %0, %0, #0\n"
@@ -135,6 +138,7 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
ARM64_ALWAYS_SYSTEM,
kvm_update_va_mask)
: "+r" (v));
+#endif
return v;
}
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 9e3ecba..a43f215 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -388,6 +388,7 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
+#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
@@ -400,7 +401,9 @@
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
+#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
+#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f8129c6..f7ddd73 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags {
__u64 reserved[2];
};
+/*
+ * Counter/Timer offset structure. Describe the virtual/physical offset.
+ * To be used with KVM_ARM_SET_COUNTER_OFFSET.
+ */
+struct kvm_arm_counter_offset {
+ __u64 counter_offset;
+ __u64 reserved;
+};
+
#define KVM_ARM_TAGS_TO_GUEST 0
#define KVM_ARM_TAGS_FROM_GUEST 1
@@ -372,6 +381,10 @@ enum {
#endif
};
+/* Device Control API on vm fd */
+#define KVM_ARM_VM_SMCCC_CTRL 0
+#define KVM_ARM_VM_SMCCC_FILTER 0
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
@@ -411,6 +424,8 @@ enum {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2
+#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0
@@ -469,6 +484,27 @@ enum {
/* run->fail_entry.hardware_entry_failure_reason codes. */
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)
+enum kvm_smccc_filter_action {
+ KVM_SMCCC_FILTER_HANDLE = 0,
+ KVM_SMCCC_FILTER_DENY,
+ KVM_SMCCC_FILTER_FWD_TO_USER,
+
+#ifdef __KERNEL__
+ NR_SMCCC_FILTER_ACTIONS
+#endif
+};
+
+struct kvm_smccc_filter {
+ __u32 base;
+ __u32 nr_functions;
+ __u8 action;
+ __u8 pad[15];
+};
+
+/* arm64-specific KVM_EXIT_HYPERCALL flags */
+#define KVM_HYPERCALL_EXIT_SMC (1U << 0)
+#define KVM_HYPERCALL_EXIT_16BIT (1U << 1)
+
#endif
#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 2e3e551..c331c49 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2223,6 +2223,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
+ {
+ .desc = "Enhanced Counter Virtualization (CNTPOFF)",
+ .capability = ARM64_HAS_ECV_CNTPOFF,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64MMFR0_EL1,
+ .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT,
+ .field_width = 4,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF,
+ },
#ifdef CONFIG_ARM64_PAN
{
.desc = "Privileged Access Never",
diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
index 28d8a5d..d731b46 100644
--- a/arch/arm64/kernel/efi-header.S
+++ b/arch/arm64/kernel/efi-header.S
@@ -66,7 +66,7 @@
.long .Lefi_header_end - .L_head // SizeOfHeaders
.long 0 // CheckSum
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
- .short 0 // DllCharacteristics
+ .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics
.quad 0 // SizeOfStackReserve
.quad 0 // SizeOfStackCommit
.quad 0 // SizeOfHeapReserve
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index e1af430..05b022b 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -16,6 +16,7 @@
#include <asm/arch_timer.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_nested.h>
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
@@ -30,14 +31,11 @@ static u32 host_ptimer_irq_flags;
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
-static const struct kvm_irq_level default_ptimer_irq = {
- .irq = 30,
- .level = 1,
-};
-
-static const struct kvm_irq_level default_vtimer_irq = {
- .irq = 27,
- .level = 1,
+static const u8 default_ppi[] = {
+ [TIMER_PTIMER] = 30,
+ [TIMER_VTIMER] = 27,
+ [TIMER_HPTIMER] = 26,
+ [TIMER_HVTIMER] = 28,
};
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
@@ -51,6 +49,24 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
struct arch_timer_context *timer,
enum kvm_arch_timer_regs treg);
+static bool kvm_arch_timer_get_input_level(int vintid);
+
+static struct irq_ops arch_timer_irq_ops = {
+ .get_input_level = kvm_arch_timer_get_input_level,
+};
+
+static bool has_cntpoff(void)
+{
+ return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
+}
+
+static int nr_timers(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_has_nv(vcpu))
+ return NR_KVM_EL0_TIMERS;
+
+ return NR_KVM_TIMERS;
+}
u32 timer_get_ctl(struct arch_timer_context *ctxt)
{
@@ -61,6 +77,10 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt)
return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
case TIMER_PTIMER:
return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
+ case TIMER_HVTIMER:
+ return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
+ case TIMER_HPTIMER:
+ return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
default:
WARN_ON(1);
return 0;
@@ -76,6 +96,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
case TIMER_PTIMER:
return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ case TIMER_HVTIMER:
+ return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
+ case TIMER_HPTIMER:
+ return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
default:
WARN_ON(1);
return 0;
@@ -84,10 +108,17 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
static u64 timer_get_offset(struct arch_timer_context *ctxt)
{
- if (ctxt->offset.vm_offset)
- return *ctxt->offset.vm_offset;
+ u64 offset = 0;
- return 0;
+ if (!ctxt)
+ return 0;
+
+ if (ctxt->offset.vm_offset)
+ offset += *ctxt->offset.vm_offset;
+ if (ctxt->offset.vcpu_offset)
+ offset += *ctxt->offset.vcpu_offset;
+
+ return offset;
}
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
@@ -101,6 +132,12 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
case TIMER_PTIMER:
__vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
break;
+ case TIMER_HVTIMER:
+ __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
+ break;
+ case TIMER_HPTIMER:
+ __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
+ break;
default:
WARN_ON(1);
}
@@ -117,6 +154,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
case TIMER_PTIMER:
__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
break;
+ case TIMER_HVTIMER:
+ __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
+ break;
+ case TIMER_HPTIMER:
+ __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
+ break;
default:
WARN_ON(1);
}
@@ -139,13 +182,27 @@ u64 kvm_phys_timer_read(void)
static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
{
- if (has_vhe()) {
+ if (vcpu_has_nv(vcpu)) {
+ if (is_hyp_ctxt(vcpu)) {
+ map->direct_vtimer = vcpu_hvtimer(vcpu);
+ map->direct_ptimer = vcpu_hptimer(vcpu);
+ map->emul_vtimer = vcpu_vtimer(vcpu);
+ map->emul_ptimer = vcpu_ptimer(vcpu);
+ } else {
+ map->direct_vtimer = vcpu_vtimer(vcpu);
+ map->direct_ptimer = vcpu_ptimer(vcpu);
+ map->emul_vtimer = vcpu_hvtimer(vcpu);
+ map->emul_ptimer = vcpu_hptimer(vcpu);
+ }
+ } else if (has_vhe()) {
map->direct_vtimer = vcpu_vtimer(vcpu);
map->direct_ptimer = vcpu_ptimer(vcpu);
+ map->emul_vtimer = NULL;
map->emul_ptimer = NULL;
} else {
map->direct_vtimer = vcpu_vtimer(vcpu);
map->direct_ptimer = NULL;
+ map->emul_vtimer = NULL;
map->emul_ptimer = vcpu_ptimer(vcpu);
}
@@ -212,7 +269,7 @@ static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
ns = cyclecounter_cyc2ns(timecounter->cc,
val - now,
timecounter->mask,
- &timecounter->frac);
+ &timer_ctx->ns_frac);
return ns;
}
@@ -240,8 +297,11 @@ static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
{
- struct arch_timer_context *ctx = vcpu_vtimer(vcpu);
u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+ struct arch_timer_context *ctx;
+
+ ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu)
+ : vcpu_vtimer(vcpu);
return kvm_counter_compute_delta(ctx, val);
}
@@ -255,7 +315,7 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
u64 min_delta = ULLONG_MAX;
int i;
- for (i = 0; i < NR_KVM_TIMERS; i++) {
+ for (i = 0; i < nr_timers(vcpu); i++) {
struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
WARN(ctx->loaded, "timer %d loaded\n", i);
@@ -338,9 +398,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
switch (index) {
case TIMER_VTIMER:
+ case TIMER_HVTIMER:
cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
+ case TIMER_HPTIMER:
cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
break;
case NR_KVM_TIMERS:
@@ -392,12 +454,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
int ret;
timer_ctx->irq.level = new_level;
- trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
+ trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
timer_ctx->irq.level);
if (!userspace_irqchip(vcpu->kvm)) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
- timer_ctx->irq.irq,
+ timer_irq(timer_ctx),
timer_ctx->irq.level,
timer_ctx);
WARN_ON(ret);
@@ -432,6 +494,12 @@ static void set_cntvoff(u64 cntvoff)
kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
}
+static void set_cntpoff(u64 cntpoff)
+{
+ if (has_cntpoff())
+ write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
+}
+
static void timer_save_state(struct arch_timer_context *ctx)
{
struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
@@ -447,7 +515,10 @@ static void timer_save_state(struct arch_timer_context *ctx)
goto out;
switch (index) {
+ u64 cval;
+
case TIMER_VTIMER:
+ case TIMER_HVTIMER:
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
@@ -473,13 +544,20 @@ static void timer_save_state(struct arch_timer_context *ctx)
set_cntvoff(0);
break;
case TIMER_PTIMER:
+ case TIMER_HPTIMER:
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
- timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL));
+ cval = read_sysreg_el0(SYS_CNTP_CVAL);
+
+ if (!has_cntpoff())
+ cval -= timer_get_offset(ctx);
+
+ timer_set_cval(ctx, cval);
/* Disable the timer */
write_sysreg_el0(0, SYS_CNTP_CTL);
isb();
+ set_cntpoff(0);
break;
case NR_KVM_TIMERS:
BUG();
@@ -510,6 +588,7 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
*/
if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
!kvm_timer_irq_can_fire(map.direct_ptimer) &&
+ !kvm_timer_irq_can_fire(map.emul_vtimer) &&
!kvm_timer_irq_can_fire(map.emul_ptimer) &&
!vcpu_has_wfit_active(vcpu))
return;
@@ -543,14 +622,23 @@ static void timer_restore_state(struct arch_timer_context *ctx)
goto out;
switch (index) {
+ u64 cval, offset;
+
case TIMER_VTIMER:
+ case TIMER_HVTIMER:
set_cntvoff(timer_get_offset(ctx));
write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
- write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL);
+ case TIMER_HPTIMER:
+ cval = timer_get_cval(ctx);
+ offset = timer_get_offset(ctx);
+ set_cntpoff(offset);
+ if (!has_cntpoff())
+ cval += offset;
+ write_sysreg_el0(cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
break;
@@ -586,7 +674,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
if (irqchip_in_kernel(vcpu->kvm))
- phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq);
+ phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
phys_active |= ctx->irq.level;
@@ -621,6 +709,128 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
+/* If _pred is true, set bit in _set, otherwise set it in _clr */
+#define assign_clear_set_bit(_pred, _bit, _clr, _set) \
+ do { \
+ if (_pred) \
+ (_set) |= (_bit); \
+ else \
+ (_clr) |= (_bit); \
+ } while (0)
+
+static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
+ struct timer_map *map)
+{
+ int hw, ret;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return;
+
+ /*
+ * We only ever unmap the vtimer irq on a VHE system that runs nested
+ * virtualization, in which case we have both a valid emul_vtimer,
+ * emul_ptimer, direct_vtimer, and direct_ptimer.
+ *
+ * Since this is called from kvm_timer_vcpu_load(), a change between
+ * vEL2 and vEL1/0 will have just happened, and the timer_map will
+ * represent this, and therefore we switch the emul/direct mappings
+ * below.
+ */
+ hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
+ if (hw < 0) {
+ kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
+ kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
+
+ ret = kvm_vgic_map_phys_irq(vcpu,
+ map->direct_vtimer->host_timer_irq,
+ timer_irq(map->direct_vtimer),
+ &arch_timer_irq_ops);
+ WARN_ON_ONCE(ret);
+ ret = kvm_vgic_map_phys_irq(vcpu,
+ map->direct_ptimer->host_timer_irq,
+ timer_irq(map->direct_ptimer),
+ &arch_timer_irq_ops);
+ WARN_ON_ONCE(ret);
+
+ /*
+ * The virtual offset behaviour is "interresting", as it
+ * always applies when HCR_EL2.E2H==0, but only when
+ * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
+ * track E2H when putting the HV timer in "direct" mode.
+ */
+ if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
+ struct arch_timer_offset *offs = &map->direct_vtimer->offset;
+
+ if (vcpu_el2_e2h_is_set(vcpu))
+ offs->vcpu_offset = NULL;
+ else
+ offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
+ }
+ }
+}
+
+static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
+{
+ bool tpt, tpc;
+ u64 clr, set;
+
+ /*
+ * No trapping gets configured here with nVHE. See
+ * __timer_enable_traps(), which is where the stuff happens.
+ */
+ if (!has_vhe())
+ return;
+
+ /*
+ * Our default policy is not to trap anything. As we progress
+ * within this function, reality kicks in and we start adding
+ * traps based on emulation requirements.
+ */
+ tpt = tpc = false;
+
+ /*
+ * We have two possibility to deal with a physical offset:
+ *
+ * - Either we have CNTPOFF (yay!) or the offset is 0:
+ * we let the guest freely access the HW
+ *
+ * - or neither of these condition apply:
+ * we trap accesses to the HW, but still use it
+ * after correcting the physical offset
+ */
+ if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
+ tpt = tpc = true;
+
+ /*
+ * Apply the enable bits that the guest hypervisor has requested for
+ * its own guest. We can only add traps that wouldn't have been set
+ * above.
+ */
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+ /* Use the VHE format for mental sanity */
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
+
+ tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
+ tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
+ }
+
+ /*
+ * Now that we have collected our requirements, compute the
+ * trap and enable bits.
+ */
+ set = 0;
+ clr = 0;
+
+ assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
+ assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
+
+ /* This only happens on VHE, so use the CNTKCTL_EL1 accessor */
+ sysreg_clear_set(cntkctl_el1, clr, set);
+}
+
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -632,6 +842,9 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
get_timer_map(vcpu, &map);
if (static_branch_likely(&has_gic_active_state)) {
+ if (vcpu_has_nv(vcpu))
+ kvm_timer_vcpu_load_nested_switch(vcpu, &map);
+
kvm_timer_vcpu_load_gic(map.direct_vtimer);
if (map.direct_ptimer)
kvm_timer_vcpu_load_gic(map.direct_ptimer);
@@ -644,9 +857,12 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
timer_restore_state(map.direct_vtimer);
if (map.direct_ptimer)
timer_restore_state(map.direct_ptimer);
-
+ if (map.emul_vtimer)
+ timer_emulate(map.emul_vtimer);
if (map.emul_ptimer)
timer_emulate(map.emul_ptimer);
+
+ timer_set_traps(vcpu, &map);
}
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -689,6 +905,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
* In any case, we re-schedule the hrtimer for the physical timer when
* coming back to the VCPU thread in kvm_timer_vcpu_load().
*/
+ if (map.emul_vtimer)
+ soft_timer_cancel(&map.emul_vtimer->hrtimer);
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
@@ -738,56 +956,89 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
* resets the timer to be disabled and unmasked and is compliant with
* the ARMv7 architecture.
*/
- timer_set_ctl(vcpu_vtimer(vcpu), 0);
- timer_set_ctl(vcpu_ptimer(vcpu), 0);
+ for (int i = 0; i < nr_timers(vcpu); i++)
+ timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
+
+ /*
+ * A vcpu running at EL2 is in charge of the offset applied to
+ * the virtual timer, so use the physical VM offset, and point
+ * the vcpu offset to CNTVOFF_EL2.
+ */
+ if (vcpu_has_nv(vcpu)) {
+ struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
+
+ offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
+ offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
+ }
if (timer->enabled) {
- kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu));
- kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu));
+ for (int i = 0; i < nr_timers(vcpu); i++)
+ kvm_timer_update_irq(vcpu, false,
+ vcpu_get_timer(vcpu, i));
if (irqchip_in_kernel(vcpu->kvm)) {
- kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq);
+ kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
if (map.direct_ptimer)
- kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq);
+ kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
}
}
+ if (map.emul_vtimer)
+ soft_timer_cancel(&map.emul_vtimer->hrtimer);
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
return 0;
}
+static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
+{
+ struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
+ struct kvm *kvm = vcpu->kvm;
+
+ ctxt->vcpu = vcpu;
+
+ if (timerid == TIMER_VTIMER)
+ ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
+ else
+ ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
+
+ hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
+ ctxt->hrtimer.function = kvm_hrtimer_expire;
+
+ switch (timerid) {
+ case TIMER_PTIMER:
+ case TIMER_HPTIMER:
+ ctxt->host_timer_irq = host_ptimer_irq;
+ break;
+ case TIMER_VTIMER:
+ case TIMER_HVTIMER:
+ ctxt->host_timer_irq = host_vtimer_irq;
+ break;
+ }
+}
+
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- vtimer->vcpu = vcpu;
- vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset;
- ptimer->vcpu = vcpu;
+ for (int i = 0; i < NR_KVM_TIMERS; i++)
+ timer_context_init(vcpu, i);
- /* Synchronize cntvoff across all vtimers of a VM. */
- timer_set_offset(vtimer, kvm_phys_timer_read());
- timer_set_offset(ptimer, 0);
+ /* Synchronize offsets across timers of a VM if not already provided */
+ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
+ timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
+ timer_set_offset(vcpu_ptimer(vcpu), 0);
+ }
hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
timer->bg_timer.function = kvm_bg_timer_expire;
+}
- hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- vtimer->hrtimer.function = kvm_hrtimer_expire;
- ptimer->hrtimer.function = kvm_hrtimer_expire;
-
- vtimer->irq.irq = default_vtimer_irq.irq;
- ptimer->irq.irq = default_ptimer_irq.irq;
-
- vtimer->host_timer_irq = host_vtimer_irq;
- ptimer->host_timer_irq = host_ptimer_irq;
-
- vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
- ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
+void kvm_timer_init_vm(struct kvm *kvm)
+{
+ for (int i = 0; i < NR_KVM_TIMERS; i++)
+ kvm->arch.timer_data.ppi[i] = default_ppi[i];
}
void kvm_timer_cpu_up(void)
@@ -814,8 +1065,11 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
break;
case KVM_REG_ARM_TIMER_CNT:
- timer = vcpu_vtimer(vcpu);
- timer_set_offset(timer, kvm_phys_timer_read() - value);
+ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
+ &vcpu->kvm->arch.flags)) {
+ timer = vcpu_vtimer(vcpu);
+ timer_set_offset(timer, kvm_phys_timer_read() - value);
+ }
break;
case KVM_REG_ARM_TIMER_CVAL:
timer = vcpu_vtimer(vcpu);
@@ -825,6 +1079,13 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
timer = vcpu_ptimer(vcpu);
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
break;
+ case KVM_REG_ARM_PTIMER_CNT:
+ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
+ &vcpu->kvm->arch.flags)) {
+ timer = vcpu_ptimer(vcpu);
+ timer_set_offset(timer, kvm_phys_timer_read() - value);
+ }
+ break;
case KVM_REG_ARM_PTIMER_CVAL:
timer = vcpu_ptimer(vcpu);
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
@@ -902,6 +1163,10 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
val = kvm_phys_timer_read() - timer_get_offset(timer);
break;
+ case TIMER_REG_VOFF:
+ val = *timer->offset.vcpu_offset;
+ break;
+
default:
BUG();
}
@@ -920,7 +1185,7 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
get_timer_map(vcpu, &map);
timer = vcpu_get_timer(vcpu, tmr);
- if (timer == map.emul_ptimer)
+ if (timer == map.emul_vtimer || timer == map.emul_ptimer)
return kvm_arm_timer_read(vcpu, timer, treg);
preempt_disable();
@@ -952,6 +1217,10 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
timer_set_cval(timer, val);
break;
+ case TIMER_REG_VOFF:
+ *timer->offset.vcpu_offset = val;
+ break;
+
default:
BUG();
}
@@ -967,7 +1236,7 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
get_timer_map(vcpu, &map);
timer = vcpu_get_timer(vcpu, tmr);
- if (timer == map.emul_ptimer) {
+ if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
soft_timer_cancel(&timer->hrtimer);
kvm_arm_timer_write(vcpu, timer, treg, val);
timer_emulate(timer);
@@ -1047,10 +1316,6 @@ static const struct irq_domain_ops timer_domain_ops = {
.free = timer_irq_domain_free,
};
-static struct irq_ops arch_timer_irq_ops = {
- .get_input_level = kvm_arch_timer_get_input_level,
-};
-
static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
{
*flags = irq_get_trigger_type(virq);
@@ -1192,44 +1457,56 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
{
- int vtimer_irq, ptimer_irq, ret;
- unsigned long i;
+ u32 ppis = 0;
+ bool valid;
- vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
- ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
- if (ret)
- return false;
+ mutex_lock(&vcpu->kvm->arch.config_lock);
- ptimer_irq = vcpu_ptimer(vcpu)->irq.irq;
- ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu));
- if (ret)
- return false;
+ for (int i = 0; i < nr_timers(vcpu); i++) {
+ struct arch_timer_context *ctx;
+ int irq;
- kvm_for_each_vcpu(i, vcpu, vcpu->kvm) {
- if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq ||
- vcpu_ptimer(vcpu)->irq.irq != ptimer_irq)
- return false;
+ ctx = vcpu_get_timer(vcpu, i);
+ irq = timer_irq(ctx);
+ if (kvm_vgic_set_owner(vcpu, irq, ctx))
+ break;
+
+ /*
+ * We know by construction that we only have PPIs, so
+ * all values are less than 32.
+ */
+ ppis |= BIT(irq);
}
- return true;
+ valid = hweight32(ppis) == nr_timers(vcpu);
+
+ if (valid)
+ set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
+
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+
+ return valid;
}
-bool kvm_arch_timer_get_input_level(int vintid)
+static bool kvm_arch_timer_get_input_level(int vintid)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
- struct arch_timer_context *timer;
if (WARN(!vcpu, "No vcpu context!\n"))
return false;
- if (vintid == vcpu_vtimer(vcpu)->irq.irq)
- timer = vcpu_vtimer(vcpu);
- else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
- timer = vcpu_ptimer(vcpu);
- else
- BUG();
+ for (int i = 0; i < nr_timers(vcpu); i++) {
+ struct arch_timer_context *ctx;
- return kvm_timer_should_fire(timer);
+ ctx = vcpu_get_timer(vcpu, i);
+ if (timer_irq(ctx) == vintid)
+ return kvm_timer_should_fire(ctx);
+ }
+
+ /* A timer IRQ has fired, but no matching timer was found? */
+ WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
+
+ return false;
}
int kvm_timer_enable(struct kvm_vcpu *vcpu)
@@ -1258,7 +1535,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_vtimer->host_timer_irq,
- map.direct_vtimer->irq.irq,
+ timer_irq(map.direct_vtimer),
&arch_timer_irq_ops);
if (ret)
return ret;
@@ -1266,7 +1543,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (map.direct_ptimer) {
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_ptimer->host_timer_irq,
- map.direct_ptimer->irq.irq,
+ timer_irq(map.direct_ptimer),
&arch_timer_irq_ops);
}
@@ -1278,45 +1555,17 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return 0;
}
-/*
- * On VHE system, we only need to configure the EL2 timer trap register once,
- * not for every world switch.
- * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
- * and this makes those bits have no effect for the host kernel execution.
- */
+/* If we have CNTPOFF, permanently set ECV to enable it */
void kvm_timer_init_vhe(void)
{
- /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */
- u32 cnthctl_shift = 10;
- u64 val;
-
- /*
- * VHE systems allow the guest direct access to the EL1 physical
- * timer/counter.
- */
- val = read_sysreg(cnthctl_el2);
- val |= (CNTHCTL_EL1PCEN << cnthctl_shift);
- val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
- write_sysreg(val, cnthctl_el2);
-}
-
-static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
-{
- struct kvm_vcpu *vcpu;
- unsigned long i;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
- vcpu_ptimer(vcpu)->irq.irq = ptimer_irq;
- }
+ if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
+ sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV);
}
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
int __user *uaddr = (int __user *)(long)attr->addr;
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- int irq;
+ int irq, idx, ret = 0;
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
@@ -1327,21 +1576,42 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!(irq_is_ppi(irq)))
return -EINVAL;
- if (vcpu->arch.timer_cpu.enabled)
- return -EBUSY;
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+
+ if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
+ &vcpu->kvm->arch.flags)) {
+ ret = -EBUSY;
+ goto out;
+ }
switch (attr->attr) {
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
- set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq);
+ idx = TIMER_VTIMER;
break;
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
- set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq);
+ idx = TIMER_PTIMER;
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
+ idx = TIMER_HVTIMER;
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
+ idx = TIMER_HPTIMER;
break;
default:
- return -ENXIO;
+ ret = -ENXIO;
+ goto out;
}
- return 0;
+ /*
+ * We cannot validate the IRQ unicity before we run, so take it at
+ * face value. The verdict will be given on first vcpu run, for each
+ * vcpu. Yes this is late. Blame it on the stupid API.
+ */
+ vcpu->kvm->arch.timer_data.ppi[idx] = irq;
+
+out:
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+ return ret;
}
int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
@@ -1357,11 +1627,17 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
timer = vcpu_ptimer(vcpu);
break;
+ case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
+ timer = vcpu_hvtimer(vcpu);
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
+ timer = vcpu_hptimer(vcpu);
+ break;
default:
return -ENXIO;
}
- irq = timer->irq.irq;
+ irq = timer_irq(timer);
return put_user(irq, uaddr);
}
@@ -1370,8 +1646,42 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
switch (attr->attr) {
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
+ case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
return 0;
}
return -ENXIO;
}
+
+int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
+ struct kvm_arm_counter_offset *offset)
+{
+ int ret = 0;
+
+ if (offset->reserved)
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+
+ if (lock_all_vcpus(kvm)) {
+ set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
+
+ /*
+ * If userspace decides to set the offset using this
+ * API rather than merely restoring the counter
+ * values, the offset applies to both the virtual and
+ * physical views.
+ */
+ kvm->arch.timer_data.voffset = offset->counter_offset;
+ kvm->arch.timer_data.poffset = offset->counter_offset;
+
+ unlock_all_vcpus(kvm);
+ } else {
+ ret = -EBUSY;
+ }
+
+ mutex_unlock(&kvm->lock);
+
+ return ret;
+}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a43e1cb..95b715c 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -128,6 +128,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int ret;
+ mutex_init(&kvm->arch.config_lock);
+
+#ifdef CONFIG_LOCKDEP
+ /* Clue in lockdep that the config_lock must be taken inside kvm->lock */
+ mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->lock);
+#endif
+
ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
return ret;
@@ -148,6 +158,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_vgic_early_init(kvm);
+ kvm_timer_init_vm(kvm);
+
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->max_vcpus = kvm_arm_default_max_vcpus();
@@ -192,6 +204,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_destroy_vcpus(kvm);
kvm_unshare_hyp(kvm, kvm + 1);
+
+ kvm_arm_teardown_hypercalls(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -220,6 +234,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_PTP_KVM:
case KVM_CAP_ARM_SYSTEM_SUSPEND:
+ case KVM_CAP_COUNTER_OFFSET:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -326,6 +341,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
int err;
+ spin_lock_init(&vcpu->arch.mp_state_lock);
+
+#ifdef CONFIG_LOCKDEP
+ /* Inform lockdep that the config_lock is acquired after vcpu->mutex */
+ mutex_lock(&vcpu->mutex);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+ mutex_unlock(&vcpu->mutex);
+#endif
+
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
@@ -443,34 +468,41 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
}
-void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
+static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
+void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.mp_state_lock);
+ __kvm_arm_vcpu_power_off(vcpu);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+}
+
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED;
+ return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
}
static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
{
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED);
kvm_make_request(KVM_REQ_SUSPEND, vcpu);
kvm_vcpu_kick(vcpu);
}
static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED;
+ return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- *mp_state = vcpu->arch.mp_state;
+ *mp_state = READ_ONCE(vcpu->arch.mp_state);
return 0;
}
@@ -480,12 +512,14 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int ret = 0;
+ spin_lock(&vcpu->arch.mp_state_lock);
+
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.mp_state = *mp_state;
+ WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
break;
case KVM_MP_STATE_STOPPED:
- kvm_arm_vcpu_power_off(vcpu);
+ __kvm_arm_vcpu_power_off(vcpu);
break;
case KVM_MP_STATE_SUSPENDED:
kvm_arm_vcpu_suspend(vcpu);
@@ -494,6 +528,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
ret = -EINVAL;
}
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
return ret;
}
@@ -593,9 +629,9 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (kvm_vm_is_protected(kvm))
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
@@ -1210,10 +1246,14 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
/*
* Handle the "start in power-off" case.
*/
+ spin_lock(&vcpu->arch.mp_state_lock);
+
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
- kvm_arm_vcpu_power_off(vcpu);
+ __kvm_arm_vcpu_power_off(vcpu);
else
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
+
+ spin_unlock(&vcpu->arch.mp_state_lock);
return 0;
}
@@ -1439,10 +1479,31 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
}
}
+static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_ARM_VM_SMCCC_CTRL:
+ return kvm_vm_smccc_has_attr(kvm, attr);
+ default:
+ return -ENXIO;
+ }
+}
+
+static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_ARM_VM_SMCCC_CTRL:
+ return kvm_vm_smccc_set_attr(kvm, attr);
+ default:
+ return -ENXIO;
+ }
+}
+
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
+ struct kvm_device_attr attr;
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
@@ -1478,11 +1539,73 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
return -EFAULT;
return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags);
}
+ case KVM_ARM_SET_COUNTER_OFFSET: {
+ struct kvm_arm_counter_offset offset;
+
+ if (copy_from_user(&offset, argp, sizeof(offset)))
+ return -EFAULT;
+ return kvm_vm_ioctl_set_counter_offset(kvm, &offset);
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+
+ return kvm_vm_has_attr(kvm, &attr);
+ }
+ case KVM_SET_DEVICE_ATTR: {
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+
+ return kvm_vm_set_attr(kvm, &attr);
+ }
default:
return -EINVAL;
}
}
+/* unlocks vcpus from @vcpu_lock_idx and smaller */
+static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
+{
+ struct kvm_vcpu *tmp_vcpu;
+
+ for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+ tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+ mutex_unlock(&tmp_vcpu->mutex);
+ }
+}
+
+void unlock_all_vcpus(struct kvm *kvm)
+{
+ lockdep_assert_held(&kvm->lock);
+
+ unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
+}
+
+/* Returns true if all vcpus were locked, false otherwise */
+bool lock_all_vcpus(struct kvm *kvm)
+{
+ struct kvm_vcpu *tmp_vcpu;
+ unsigned long c;
+
+ lockdep_assert_held(&kvm->lock);
+
+ /*
+ * Any time a vcpu is in an ioctl (including running), the
+ * core KVM code tries to grab the vcpu->mutex.
+ *
+ * By grabbing the vcpu->mutex of all VCPUs we ensure that no
+ * other VCPUs can fiddle with the state while we access it.
+ */
+ kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
+ if (!mutex_trylock(&tmp_vcpu->mutex)) {
+ unlock_vcpus(kvm, c - 1);
+ return false;
+ }
+ }
+
+ return true;
+}
+
static unsigned long nvhe_percpu_size(void)
{
return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 26a2ebc..20280a5 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -590,11 +590,16 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu)
return copy_core_reg_indices(vcpu, NULL);
}
-/**
- * ARM64 versions of the TIMER registers, always available on arm64
- */
+static const u64 timer_reg_list[] = {
+ KVM_REG_ARM_TIMER_CTL,
+ KVM_REG_ARM_TIMER_CNT,
+ KVM_REG_ARM_TIMER_CVAL,
+ KVM_REG_ARM_PTIMER_CTL,
+ KVM_REG_ARM_PTIMER_CNT,
+ KVM_REG_ARM_PTIMER_CVAL,
+};
-#define NUM_TIMER_REGS 3
+#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list)
static bool is_timer_reg(u64 index)
{
@@ -602,6 +607,9 @@ static bool is_timer_reg(u64 index)
case KVM_REG_ARM_TIMER_CTL:
case KVM_REG_ARM_TIMER_CNT:
case KVM_REG_ARM_TIMER_CVAL:
+ case KVM_REG_ARM_PTIMER_CTL:
+ case KVM_REG_ARM_PTIMER_CNT:
+ case KVM_REG_ARM_PTIMER_CVAL:
return true;
}
return false;
@@ -609,14 +617,11 @@ static bool is_timer_reg(u64 index)
static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
- if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
- return -EFAULT;
- uindices++;
- if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
- return -EFAULT;
- uindices++;
- if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
- return -EFAULT;
+ for (int i = 0; i < NUM_TIMER_REGS; i++) {
+ if (put_user(timer_reg_list[i], uindices))
+ return -EFAULT;
+ uindices++;
+ }
return 0;
}
@@ -957,7 +962,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
switch (attr->group) {
case KVM_ARM_VCPU_PMU_V3_CTRL:
+ mutex_lock(&vcpu->kvm->arch.config_lock);
ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
break;
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_set_attr(vcpu, attr);
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index a798c0b..6dcd660 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -36,8 +36,6 @@ static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
static int handle_hvc(struct kvm_vcpu *vcpu)
{
- int ret;
-
trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
@@ -52,33 +50,29 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
return 1;
}
- ret = kvm_hvc_call_handler(vcpu);
- if (ret < 0) {
- vcpu_set_reg(vcpu, 0, ~0UL);
- return 1;
- }
-
- return ret;
+ return kvm_smccc_call_handler(vcpu);
}
static int handle_smc(struct kvm_vcpu *vcpu)
{
- int ret;
-
/*
* "If an SMC instruction executed at Non-secure EL1 is
* trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
* Trap exception, not a Secure Monitor Call exception [...]"
*
* We need to advance the PC after the trap, as it would
- * otherwise return to the same address...
- *
- * Only handle SMCs from the virtual EL2 with an immediate of zero and
- * skip it otherwise.
+ * otherwise return to the same address. Furthermore, pre-incrementing
+ * the PC before potentially exiting to userspace maintains the same
+ * abstraction for both SMCs and HVCs.
*/
- if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) {
+ kvm_incr_pc(vcpu);
+
+ /*
+ * SMCs with a nonzero immediate are reserved according to DEN0028E 2.9
+ * "SMC and HVC immediate value".
+ */
+ if (kvm_vcpu_hvc_get_imm(vcpu)) {
vcpu_set_reg(vcpu, 0, ~0UL);
- kvm_incr_pc(vcpu);
return 1;
}
@@ -89,13 +83,7 @@ static int handle_smc(struct kvm_vcpu *vcpu)
* at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than
* being treated as UNDEFINED.
*/
- ret = kvm_hvc_call_handler(vcpu);
- if (ret < 0)
- vcpu_set_reg(vcpu, 0, ~0UL);
-
- kvm_incr_pc(vcpu);
-
- return ret;
+ return kvm_smccc_call_handler(vcpu);
}
/*
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 07d37ff..c41166f1 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -26,6 +26,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
@@ -326,6 +327,55 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
return true;
}
+static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_context *ctxt;
+ u32 sysreg;
+ u64 val;
+
+ /*
+ * We only get here for 64bit guests, 32bit guests will hit
+ * the long and winding road all the way to the standard
+ * handling. Yes, it sucks to be irrelevant.
+ */
+ sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+
+ switch (sysreg) {
+ case SYS_CNTPCT_EL0:
+ case SYS_CNTPCTSS_EL0:
+ if (vcpu_has_nv(vcpu)) {
+ if (is_hyp_ctxt(vcpu)) {
+ ctxt = vcpu_hptimer(vcpu);
+ break;
+ }
+
+ /* Check for guest hypervisor trapping */
+ val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ val = (val & CNTHCTL_EL1PCTEN) << 10;
+
+ if (!(val & (CNTHCTL_EL1PCTEN << 10)))
+ return false;
+ }
+
+ ctxt = vcpu_ptimer(vcpu);
+ break;
+ default:
+ return false;
+ }
+
+ val = arch_timer_read_cntpct_el0();
+
+ if (ctxt->offset.vm_offset)
+ val -= *kern_hyp_va(ctxt->offset.vm_offset);
+ if (ctxt->offset.vcpu_offset)
+ val -= *kern_hyp_va(ctxt->offset.vcpu_offset);
+
+ vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
+ __kvm_skip_instr(vcpu);
+ return true;
+}
+
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
@@ -339,6 +389,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
return kvm_hyp_handle_ptrauth(vcpu, exit_code);
+ if (kvm_hyp_handle_cntpct(vcpu))
+ return true;
+
return false;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 2673bde..d756b93 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -37,7 +37,6 @@ static void __debug_save_spe(u64 *pmscr_el1)
/* Now drain all buffered data to memory */
psb_csync();
- dsb(nsh);
}
static void __debug_restore_spe(u64 pmscr_el1)
@@ -69,7 +68,6 @@ static void __debug_save_trace(u64 *trfcr_el1)
isb();
/* Drain the trace buffer to memory */
tsb_csync();
- dsb(nsh);
}
static void __debug_restore_trace(u64 trfcr_el1)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 552653f..2e9ec4a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -297,6 +297,13 @@ int __pkvm_prot_finalize(void)
params->vttbr = kvm_get_vttbr(mmu);
params->vtcr = host_mmu.arch.vtcr;
params->hcr_el2 |= HCR_VM;
+
+ /*
+ * The CMO below not only cleans the updated params to the
+ * PoC, but also provides the DSB that ensures ongoing
+ * page-table walks that have started before we trapped to EL2
+ * have completed.
+ */
kvm_flush_dcache_to_poc(params, sizeof(*params));
write_sysreg(params->hcr_el2, hcr_el2);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c2cb46c..71fa16a 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -272,6 +272,17 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
*/
__debug_save_host_buffers_nvhe(vcpu);
+ /*
+ * We're about to restore some new MMU state. Make sure
+ * ongoing page-table walks that have started before we
+ * trapped to EL2 have completed. This also synchronises the
+ * above disabling of SPE and TRBE.
+ *
+ * See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
+ * rule R_LFHQG and subsequent information statements.
+ */
+ dsb(nsh);
+
__kvm_adjust_pc(vcpu);
/*
@@ -306,6 +317,13 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__timer_disable_traps(vcpu);
__hyp_vgic_save_state(vcpu);
+ /*
+ * Same thing as before the guest run: we're about to switch
+ * the MMU context, so let's make sure we don't have any
+ * ongoing EL1&0 translations.
+ */
+ dsb(nsh);
+
__deactivate_traps(vcpu);
__load_host_stage2();
diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
index 9072e71..b185ac0d 100644
--- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
@@ -9,6 +9,7 @@
#include <linux/kvm_host.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
void __kvm_timer_set_cntvoff(u64 cntvoff)
{
@@ -35,14 +36,19 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu)
*/
void __timer_enable_traps(struct kvm_vcpu *vcpu)
{
- u64 val;
+ u64 clr = 0, set = 0;
/*
* Disallow physical timer access for the guest
- * Physical counter access is allowed
+ * Physical counter access is allowed if no offset is enforced
+ * or running protected (we don't offset anything in this case).
*/
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
+ clr = CNTHCTL_EL1PCEN;
+ if (is_protected_kvm_enabled() ||
+ !kern_hyp_va(vcpu->kvm)->arch.timer_data.poffset)
+ set |= CNTHCTL_EL1PCTEN;
+ else
+ clr |= CNTHCTL_EL1PCTEN;
+
+ sysreg_clear_set(cnthctl_el2, clr, set);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index d296d61..9781791 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -15,8 +15,31 @@ struct tlb_inv_context {
};
static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt)
+ struct tlb_inv_context *cxt,
+ bool nsh)
{
+ /*
+ * We have two requirements:
+ *
+ * - ensure that the page table updates are visible to all
+ * CPUs, for which a dsb(DOMAIN-st) is what we need, DOMAIN
+ * being either ish or nsh, depending on the invalidation
+ * type.
+ *
+ * - complete any speculative page table walk started before
+ * we trapped to EL2 so that we can mess with the MM
+ * registers out of context, for which dsb(nsh) is enough
+ *
+ * The composition of these two barriers is a dsb(DOMAIN), and
+ * the 'nsh' parameter tracks the distinction between
+ * Inner-Shareable and Non-Shareable, as specified by the
+ * callers.
+ */
+ if (nsh)
+ dsb(nsh);
+ else
+ dsb(ish);
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
@@ -60,10 +83,8 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
{
struct tlb_inv_context cxt;
- dsb(ishst);
-
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ __tlb_switch_to_guest(mmu, &cxt, false);
/*
* We could do so much better if we had the VA as well.
@@ -113,10 +134,8 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
- dsb(ishst);
-
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ __tlb_switch_to_guest(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
@@ -130,7 +149,7 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ __tlb_switch_to_guest(mmu, &cxt, false);
__tlbi(vmalle1);
asm volatile("ic iallu");
@@ -142,7 +161,8 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
void __kvm_flush_vm_context(void)
{
- dsb(ishst);
+ /* Same remark as in __tlb_switch_to_guest() */
+ dsb(ish);
__tlbi(alle1is);
/*
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index cd3f311..3d868e8 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -227,11 +227,10 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/*
* When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. Make sure these changes take effect
- * before running the host or additional guests.
+ * parameters, such as traps. We rely on the isb() in kvm_call_hyp*()
+ * to make sure these changes take effect before running the host or
+ * additional guests.
*/
- isb();
-
return ret;
}
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 7b44f6b..b35a178 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -13,6 +13,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_nested.h>
/*
* VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
@@ -70,6 +71,17 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
__sysreg_save_user_state(host_ctxt);
/*
+ * When running a normal EL1 guest, we only load a new vcpu
+ * after a context switch, which imvolves a DSB, so all
+ * speculative EL1&0 walks will have already completed.
+ * If running NV, the vcpu may transition between vEL1 and
+ * vEL2 without a context switch, so make sure we complete
+ * those walks before loading a new context.
+ */
+ if (vcpu_has_nv(vcpu))
+ dsb(nsh);
+
+ /*
* Load guest EL1 and user state
*
* We must restore the 32-bit state before the sysregs, thanks
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 5da884e..2e16fc7b 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -47,7 +47,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset;
break;
case KVM_PTP_PHYS_COUNTER:
- cycles = systime_snapshot.cycles;
+ cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset;
break;
default:
return;
@@ -65,7 +65,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
val[3] = lower_32_bits(cycles);
}
-static bool kvm_hvc_call_default_allowed(u32 func_id)
+static bool kvm_smccc_default_allowed(u32 func_id)
{
switch (func_id) {
/*
@@ -93,7 +93,7 @@ static bool kvm_hvc_call_default_allowed(u32 func_id)
}
}
-static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
+static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id)
{
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
@@ -117,20 +117,161 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP,
&smccc_feat->vendor_hyp_bmap);
default:
- return kvm_hvc_call_default_allowed(func_id);
+ return false;
}
}
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+#define SMC32_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID
+#define SMC32_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ 0, ARM_SMCCC_FUNC_MASK)
+
+#define SMC64_ARCH_RANGE_BEGIN ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ 0, 0)
+#define SMC64_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ 0, ARM_SMCCC_FUNC_MASK)
+
+static void init_smccc_filter(struct kvm *kvm)
+{
+ int r;
+
+ mt_init(&kvm->arch.smccc_filter);
+
+ /*
+ * Prevent userspace from handling any SMCCC calls in the architecture
+ * range, avoiding the risk of misrepresenting Spectre mitigation status
+ * to the guest.
+ */
+ r = mtree_insert_range(&kvm->arch.smccc_filter,
+ SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END,
+ xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
+ GFP_KERNEL_ACCOUNT);
+ WARN_ON_ONCE(r);
+
+ r = mtree_insert_range(&kvm->arch.smccc_filter,
+ SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END,
+ xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
+ GFP_KERNEL_ACCOUNT);
+ WARN_ON_ONCE(r);
+
+}
+
+static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr)
+{
+ const void *zero_page = page_to_virt(ZERO_PAGE(0));
+ struct kvm_smccc_filter filter;
+ u32 start, end;
+ int r;
+
+ if (copy_from_user(&filter, uaddr, sizeof(filter)))
+ return -EFAULT;
+
+ if (memcmp(filter.pad, zero_page, sizeof(filter.pad)))
+ return -EINVAL;
+
+ start = filter.base;
+ end = start + filter.nr_functions - 1;
+
+ if (end < start || filter.action >= NR_SMCCC_FILTER_ACTIONS)
+ return -EINVAL;
+
+ mutex_lock(&kvm->arch.config_lock);
+
+ if (kvm_vm_has_ran_once(kvm)) {
+ r = -EBUSY;
+ goto out_unlock;
+ }
+
+ r = mtree_insert_range(&kvm->arch.smccc_filter, start, end,
+ xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT);
+ if (r)
+ goto out_unlock;
+
+ set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags);
+
+out_unlock:
+ mutex_unlock(&kvm->arch.config_lock);
+ return r;
+}
+
+static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id)
+{
+ unsigned long idx = func_id;
+ void *val;
+
+ if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags))
+ return KVM_SMCCC_FILTER_HANDLE;
+
+ /*
+ * But where's the error handling, you say?
+ *
+ * mt_find() returns NULL if no entry was found, which just so happens
+ * to match KVM_SMCCC_FILTER_HANDLE.
+ */
+ val = mt_find(&kvm->arch.smccc_filter, &idx, idx);
+ return xa_to_value(val);
+}
+
+static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id)
+{
+ /*
+ * Intervening actions in the SMCCC filter take precedence over the
+ * pseudo-firmware register bitmaps.
+ */
+ u8 action = kvm_smccc_filter_get_action(vcpu->kvm, func_id);
+ if (action != KVM_SMCCC_FILTER_HANDLE)
+ return action;
+
+ if (kvm_smccc_test_fw_bmap(vcpu, func_id) ||
+ kvm_smccc_default_allowed(func_id))
+ return KVM_SMCCC_FILTER_HANDLE;
+
+ return KVM_SMCCC_FILTER_DENY;
+}
+
+static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id)
+{
+ u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
+ struct kvm_run *run = vcpu->run;
+ u64 flags = 0;
+
+ if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64)
+ flags |= KVM_HYPERCALL_EXIT_SMC;
+
+ if (!kvm_vcpu_trap_il_is32bit(vcpu))
+ flags |= KVM_HYPERCALL_EXIT_16BIT;
+
+ run->exit_reason = KVM_EXIT_HYPERCALL;
+ run->hypercall = (typeof(run->hypercall)) {
+ .nr = func_id,
+ .flags = flags,
+ };
+}
+
+int kvm_smccc_call_handler(struct kvm_vcpu *vcpu)
{
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
u32 func_id = smccc_get_function(vcpu);
u64 val[4] = {SMCCC_RET_NOT_SUPPORTED};
u32 feature;
+ u8 action;
gpa_t gpa;
- if (!kvm_hvc_call_allowed(vcpu, func_id))
+ action = kvm_smccc_get_action(vcpu, func_id);
+ switch (action) {
+ case KVM_SMCCC_FILTER_HANDLE:
+ break;
+ case KVM_SMCCC_FILTER_DENY:
goto out;
+ case KVM_SMCCC_FILTER_FWD_TO_USER:
+ kvm_prepare_hypercall_exit(vcpu, func_id);
+ return 0;
+ default:
+ WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action);
+ goto out;
+ }
switch (func_id) {
case ARM_SMCCC_VERSION_FUNC_ID:
@@ -245,6 +386,13 @@ void kvm_arm_init_hypercalls(struct kvm *kvm)
smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES;
smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES;
smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
+
+ init_smccc_filter(kvm);
+}
+
+void kvm_arm_teardown_hypercalls(struct kvm *kvm)
+{
+ mtree_destroy(&kvm->arch.smccc_filter);
}
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
@@ -377,17 +525,16 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
if (val & ~fw_reg_features)
return -EINVAL;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) &&
- val != *fw_reg_bmap) {
+ if (kvm_vm_has_ran_once(kvm) && val != *fw_reg_bmap) {
ret = -EBUSY;
goto out;
}
WRITE_ONCE(*fw_reg_bmap, val);
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
@@ -479,3 +626,25 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
return -EINVAL;
}
+
+int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_ARM_VM_SMCCC_FILTER:
+ return 0;
+ default:
+ return -ENXIO;
+ }
+}
+
+int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ void __user *uaddr = (void __user *)attr->addr;
+
+ switch (attr->attr) {
+ case KVM_ARM_VM_SMCCC_FILTER:
+ return kvm_smccc_set_filter(kvm, uaddr);
+ default:
+ return -ENXIO;
+ }
+}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 2490840..8402e5a 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -874,13 +874,13 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
struct arm_pmu *arm_pmu;
int ret = -ENXIO;
- mutex_lock(&kvm->lock);
+ lockdep_assert_held(&kvm->arch.config_lock);
mutex_lock(&arm_pmus_lock);
list_for_each_entry(entry, &arm_pmus, entry) {
arm_pmu = entry->arm_pmu;
if (arm_pmu->pmu.type == pmu_id) {
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) ||
+ if (kvm_vm_has_ran_once(kvm) ||
(kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
ret = -EBUSY;
break;
@@ -894,7 +894,6 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
}
mutex_unlock(&arm_pmus_lock);
- mutex_unlock(&kvm->lock);
return ret;
}
@@ -902,22 +901,20 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
struct kvm *kvm = vcpu->kvm;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
if (!kvm_vcpu_has_pmu(vcpu))
return -ENODEV;
if (vcpu->arch.pmu.created)
return -EBUSY;
- mutex_lock(&kvm->lock);
if (!kvm->arch.arm_pmu) {
/* No PMU set, get the default one */
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
- if (!kvm->arch.arm_pmu) {
- mutex_unlock(&kvm->lock);
+ if (!kvm->arch.arm_pmu)
return -ENODEV;
- }
}
- mutex_unlock(&kvm->lock);
switch (attr->attr) {
case KVM_ARM_VCPU_PMU_V3_IRQ: {
@@ -961,19 +958,13 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
filter.action != KVM_PMU_EVENT_DENY))
return -EINVAL;
- mutex_lock(&kvm->lock);
-
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) {
- mutex_unlock(&kvm->lock);
+ if (kvm_vm_has_ran_once(kvm))
return -EBUSY;
- }
if (!kvm->arch.pmu_filter) {
kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
- if (!kvm->arch.pmu_filter) {
- mutex_unlock(&kvm->lock);
+ if (!kvm->arch.pmu_filter)
return -ENOMEM;
- }
/*
* The default depends on the first applied filter.
@@ -992,8 +983,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
else
bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
- mutex_unlock(&kvm->lock);
-
return 0;
}
case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 7fbc4c1..1f69b66 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -62,6 +62,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
struct vcpu_reset_state *reset_state;
struct kvm *kvm = source_vcpu->kvm;
struct kvm_vcpu *vcpu = NULL;
+ int ret = PSCI_RET_SUCCESS;
unsigned long cpu_id;
cpu_id = smccc_get_arg1(source_vcpu);
@@ -76,11 +77,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/
if (!vcpu)
return PSCI_RET_INVALID_PARAMS;
+
+ spin_lock(&vcpu->arch.mp_state_lock);
if (!kvm_arm_vcpu_stopped(vcpu)) {
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
- return PSCI_RET_ALREADY_ON;
+ ret = PSCI_RET_ALREADY_ON;
else
- return PSCI_RET_INVALID_PARAMS;
+ ret = PSCI_RET_INVALID_PARAMS;
+
+ goto out_unlock;
}
reset_state = &vcpu->arch.reset_state;
@@ -96,7 +101,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/
reset_state->r0 = smccc_get_arg3(source_vcpu);
- WRITE_ONCE(reset_state->reset, true);
+ reset_state->reset = true;
kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
/*
@@ -105,10 +110,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/
smp_wmb();
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
kvm_vcpu_wake_up(vcpu);
- return PSCI_RET_SUCCESS;
+out_unlock:
+ spin_unlock(&vcpu->arch.mp_state_lock);
+ return ret;
}
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
@@ -168,8 +175,11 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
* after this call is handled and before the VCPUs have been
* re-initialized.
*/
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ spin_lock(&tmp->arch.mp_state_lock);
+ WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
+ spin_unlock(&tmp->arch.mp_state_lock);
+ }
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
@@ -229,7 +239,6 @@ static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32
static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vcpu->kvm;
u32 psci_fn = smccc_get_function(vcpu);
unsigned long val;
int ret = 1;
@@ -254,9 +263,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
kvm_psci_narrow_to_32bit(vcpu);
fallthrough;
case PSCI_0_2_FN64_CPU_ON:
- mutex_lock(&kvm->lock);
val = kvm_psci_vcpu_on(vcpu);
- mutex_unlock(&kvm->lock);
break;
case PSCI_0_2_FN_AFFINITY_INFO:
kvm_psci_narrow_to_32bit(vcpu);
@@ -395,7 +402,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vcpu->kvm;
u32 psci_fn = smccc_get_function(vcpu);
unsigned long val;
@@ -405,9 +411,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
val = PSCI_RET_SUCCESS;
break;
case KVM_PSCI_FN_CPU_ON:
- mutex_lock(&kvm->lock);
val = kvm_psci_vcpu_on(vcpu);
- mutex_unlock(&kvm->lock);
break;
default:
val = PSCI_RET_NOT_SUPPORTED;
@@ -435,6 +439,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
int kvm_psci_call(struct kvm_vcpu *vcpu)
{
u32 psci_fn = smccc_get_function(vcpu);
+ int version = kvm_psci_version(vcpu);
unsigned long val;
val = kvm_psci_check_allowed_function(vcpu, psci_fn);
@@ -443,7 +448,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
return 1;
}
- switch (kvm_psci_version(vcpu)) {
+ switch (version) {
case KVM_ARM_PSCI_1_1:
return kvm_psci_1_x_call(vcpu, 1);
case KVM_ARM_PSCI_1_0:
@@ -453,6 +458,8 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
case KVM_ARM_PSCI_0_1:
return kvm_psci_0_1_call(vcpu);
default:
- return -EINVAL;
+ WARN_ONCE(1, "Unknown PSCI version %d", version);
+ smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
+ return 1;
}
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 49a3257..b5dee8e 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -205,7 +205,7 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
- lockdep_assert_held(&kvm->lock);
+ lockdep_assert_held(&kvm->arch.config_lock);
if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) {
/*
@@ -262,17 +262,18 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
bool loaded;
u32 pstate;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
ret = kvm_set_vm_width(vcpu);
- if (!ret) {
- reset_state = vcpu->arch.reset_state;
- WRITE_ONCE(vcpu->arch.reset_state.reset, false);
- }
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
if (ret)
return ret;
+ spin_lock(&vcpu->arch.mp_state_lock);
+ reset_state = vcpu->arch.reset_state;
+ vcpu->arch.reset_state.reset = false;
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
/* Reset PMU outside of the non-preemptible section */
kvm_pmu_vcpu_reset(vcpu);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 53749d3..feca770 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1139,6 +1139,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
tmr = TIMER_PTIMER;
treg = TIMER_REG_CVAL;
break;
+ case SYS_CNTPCT_EL0:
+ case SYS_CNTPCTSS_EL0:
+ case SYS_AARCH32_CNTPCT:
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_CNT;
+ break;
default:
print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
kvm_inject_undefined(vcpu);
@@ -2075,6 +2081,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
AMU_AMEVTYPER1_EL0(14),
AMU_AMEVTYPER1_EL0(15),
+ { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer },
@@ -2525,10 +2533,12 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
{ CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
+ { SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
+ { SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer },
};
static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index f3e46a9..6ce5c02 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -206,6 +206,7 @@ TRACE_EVENT(kvm_get_timer_map,
__field( unsigned long, vcpu_id )
__field( int, direct_vtimer )
__field( int, direct_ptimer )
+ __field( int, emul_vtimer )
__field( int, emul_ptimer )
),
@@ -214,14 +215,17 @@ TRACE_EVENT(kvm_get_timer_map,
__entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer);
__entry->direct_ptimer =
(map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1;
+ __entry->emul_vtimer =
+ (map->emul_vtimer) ? arch_timer_ctx_index(map->emul_vtimer) : -1;
__entry->emul_ptimer =
(map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1;
),
- TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d",
+ TP_printk("VCPU: %ld, dv: %d, dp: %d, ev: %d, ep: %d",
__entry->vcpu_id,
__entry->direct_vtimer,
__entry->direct_ptimer,
+ __entry->emul_vtimer,
__entry->emul_ptimer)
);
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 78cde68..07aa043 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -85,7 +85,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
struct kvm *kvm = s->private;
struct vgic_state_iter *iter;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
iter = kvm->arch.vgic.iter;
if (iter) {
iter = ERR_PTR(-EBUSY);
@@ -104,7 +104,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
if (end_of_vgic(iter))
iter = NULL;
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return iter;
}
@@ -132,12 +132,12 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
if (IS_ERR(v))
return;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
iter = kvm->arch.vgic.iter;
kfree(iter->lpi_array);
kfree(iter);
kvm->arch.vgic.iter = NULL;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
}
static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index cd134db..9d42c7c 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -74,9 +74,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
unsigned long i;
int ret;
- if (irqchip_in_kernel(kvm))
- return -EEXIST;
-
/*
* This function is also called by the KVM_CREATE_IRQCHIP handler,
* which had no chance yet to check the availability of the GICv2
@@ -87,10 +84,20 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
!kvm_vgic_global_state.can_emulate_gicv2)
return -ENODEV;
+ /* Must be held to avoid race with vCPU creation */
+ lockdep_assert_held(&kvm->lock);
+
ret = -EBUSY;
if (!lock_all_vcpus(kvm))
return ret;
+ mutex_lock(&kvm->arch.config_lock);
+
+ if (irqchip_in_kernel(kvm)) {
+ ret = -EEXIST;
+ goto out_unlock;
+ }
+
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu_has_run_once(vcpu))
goto out_unlock;
@@ -118,6 +125,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
out_unlock:
+ mutex_unlock(&kvm->arch.config_lock);
unlock_all_vcpus(kvm);
return ret;
}
@@ -227,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
* KVM io device for the redistributor that belongs to this VCPU.
*/
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
ret = vgic_register_redist_iodev(vcpu);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
}
return ret;
}
@@ -250,7 +258,6 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
* The function is generally called when nr_spis has been explicitly set
* by the guest through the KVM DEVICE API. If not nr_spis is set to 256.
* vgic_initialized() returns true when this function has succeeded.
- * Must be called with kvm->lock held!
*/
int vgic_init(struct kvm *kvm)
{
@@ -259,6 +266,8 @@ int vgic_init(struct kvm *kvm)
int ret = 0, i;
unsigned long idx;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
if (vgic_initialized(kvm))
return 0;
@@ -373,12 +382,13 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
}
-/* To be called with kvm->lock held */
static void __kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
unsigned long i;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
vgic_debug_destroy(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -389,9 +399,9 @@ static void __kvm_vgic_destroy(struct kvm *kvm)
void kvm_vgic_destroy(struct kvm *kvm)
{
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
__kvm_vgic_destroy(kvm);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
}
/**
@@ -414,9 +424,9 @@ int vgic_lazy_init(struct kvm *kvm)
if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
return -EBUSY;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
ret = vgic_init(kvm);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
}
return ret;
@@ -441,7 +451,7 @@ int kvm_vgic_map_resources(struct kvm *kvm)
if (likely(vgic_ready(kvm)))
return 0;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
if (vgic_ready(kvm))
goto out;
@@ -459,7 +469,7 @@ int kvm_vgic_map_resources(struct kvm *kvm)
dist->ready = true;
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 2642e9c..750e51e 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -1958,6 +1958,16 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
mutex_init(&its->its_lock);
mutex_init(&its->cmd_lock);
+ /* Yep, even more trickery for lock ordering... */
+#ifdef CONFIG_LOCKDEP
+ mutex_lock(&dev->kvm->arch.config_lock);
+ mutex_lock(&its->cmd_lock);
+ mutex_lock(&its->its_lock);
+ mutex_unlock(&its->its_lock);
+ mutex_unlock(&its->cmd_lock);
+ mutex_unlock(&dev->kvm->arch.config_lock);
+#endif
+
its->vgic_its_base = VGIC_ADDR_UNDEF;
INIT_LIST_HEAD(&its->device_list);
@@ -2045,6 +2055,13 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
+ if (!lock_all_vcpus(dev->kvm)) {
+ mutex_unlock(&dev->kvm->lock);
+ return -EBUSY;
+ }
+
+ mutex_lock(&dev->kvm->arch.config_lock);
+
if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
ret = -ENXIO;
goto out;
@@ -2058,11 +2075,6 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev,
goto out;
}
- if (!lock_all_vcpus(dev->kvm)) {
- ret = -EBUSY;
- goto out;
- }
-
addr = its->vgic_its_base + offset;
len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4;
@@ -2076,8 +2088,9 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev,
} else {
*reg = region->its_read(dev->kvm, its, addr, len);
}
- unlock_all_vcpus(dev->kvm);
out:
+ mutex_unlock(&dev->kvm->arch.config_lock);
+ unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
return ret;
}
@@ -2749,14 +2762,15 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
return 0;
mutex_lock(&kvm->lock);
- mutex_lock(&its->its_lock);
if (!lock_all_vcpus(kvm)) {
- mutex_unlock(&its->its_lock);
mutex_unlock(&kvm->lock);
return -EBUSY;
}
+ mutex_lock(&kvm->arch.config_lock);
+ mutex_lock(&its->its_lock);
+
switch (attr) {
case KVM_DEV_ARM_ITS_CTRL_RESET:
vgic_its_reset(kvm, its);
@@ -2769,8 +2783,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
break;
}
- unlock_all_vcpus(kvm);
mutex_unlock(&its->its_lock);
+ mutex_unlock(&kvm->arch.config_lock);
+ unlock_all_vcpus(kvm);
mutex_unlock(&kvm->lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index edeac23..35cfa26 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -46,7 +46,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev
struct vgic_dist *vgic = &kvm->arch.vgic;
int r;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -68,7 +68,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev
r = -ENODEV;
}
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return r;
}
@@ -102,7 +102,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
if (get_user(addr, uaddr))
return -EFAULT;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
switch (attr->attr) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -191,7 +191,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
}
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
if (!r && !write)
r = put_user(addr, uaddr);
@@ -227,7 +227,7 @@ static int vgic_set_common_attr(struct kvm_device *dev,
(val & 31))
return -EINVAL;
- mutex_lock(&dev->kvm->lock);
+ mutex_lock(&dev->kvm->arch.config_lock);
if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis)
ret = -EBUSY;
@@ -235,16 +235,16 @@ static int vgic_set_common_attr(struct kvm_device *dev,
dev->kvm->arch.vgic.nr_spis =
val - VGIC_NR_PRIVATE_IRQS;
- mutex_unlock(&dev->kvm->lock);
+ mutex_unlock(&dev->kvm->arch.config_lock);
return ret;
}
case KVM_DEV_ARM_VGIC_GRP_CTRL: {
switch (attr->attr) {
case KVM_DEV_ARM_VGIC_CTRL_INIT:
- mutex_lock(&dev->kvm->lock);
+ mutex_lock(&dev->kvm->arch.config_lock);
r = vgic_init(dev->kvm);
- mutex_unlock(&dev->kvm->lock);
+ mutex_unlock(&dev->kvm->arch.config_lock);
return r;
case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
/*
@@ -260,7 +260,10 @@ static int vgic_set_common_attr(struct kvm_device *dev,
mutex_unlock(&dev->kvm->lock);
return -EBUSY;
}
+
+ mutex_lock(&dev->kvm->arch.config_lock);
r = vgic_v3_save_pending_tables(dev->kvm);
+ mutex_unlock(&dev->kvm->arch.config_lock);
unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
return r;
@@ -342,44 +345,6 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
return 0;
}
-/* unlocks vcpus from @vcpu_lock_idx and smaller */
-static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
-{
- struct kvm_vcpu *tmp_vcpu;
-
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&tmp_vcpu->mutex);
- }
-}
-
-void unlock_all_vcpus(struct kvm *kvm)
-{
- unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
-}
-
-/* Returns true if all vcpus were locked, false otherwise */
-bool lock_all_vcpus(struct kvm *kvm)
-{
- struct kvm_vcpu *tmp_vcpu;
- unsigned long c;
-
- /*
- * Any time a vcpu is run, vcpu_load is called which tries to grab the
- * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
- * that no other VCPUs are run and fiddle with the vgic state while we
- * access it.
- */
- kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
- if (!mutex_trylock(&tmp_vcpu->mutex)) {
- unlock_vcpus(kvm, c - 1);
- return false;
- }
- }
-
- return true;
-}
-
/**
* vgic_v2_attr_regs_access - allows user space to access VGIC v2 state
*
@@ -411,15 +376,17 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
+ if (!lock_all_vcpus(dev->kvm)) {
+ mutex_unlock(&dev->kvm->lock);
+ return -EBUSY;
+ }
+
+ mutex_lock(&dev->kvm->arch.config_lock);
+
ret = vgic_init(dev->kvm);
if (ret)
goto out;
- if (!lock_all_vcpus(dev->kvm)) {
- ret = -EBUSY;
- goto out;
- }
-
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val);
@@ -432,8 +399,9 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
break;
}
- unlock_all_vcpus(dev->kvm);
out:
+ mutex_unlock(&dev->kvm->arch.config_lock);
+ unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
if (!ret && !is_write)
@@ -569,12 +537,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
- if (unlikely(!vgic_initialized(dev->kvm))) {
- ret = -EBUSY;
- goto out;
+ if (!lock_all_vcpus(dev->kvm)) {
+ mutex_unlock(&dev->kvm->lock);
+ return -EBUSY;
}
- if (!lock_all_vcpus(dev->kvm)) {
+ mutex_lock(&dev->kvm->arch.config_lock);
+
+ if (unlikely(!vgic_initialized(dev->kvm))) {
ret = -EBUSY;
goto out;
}
@@ -609,8 +579,9 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
break;
}
- unlock_all_vcpus(dev->kvm);
out:
+ mutex_unlock(&dev->kvm->arch.config_lock);
+ unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
if (!ret && uaccess && !is_write) {
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 91201f7..472b18a 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -111,7 +111,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
case GICD_CTLR: {
bool was_enabled, is_hwsgi;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
was_enabled = dist->enabled;
is_hwsgi = dist->nassgireq;
@@ -139,7 +139,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
else if (!was_enabled && dist->enabled)
vgic_kick_vcpus(vcpu->kvm);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
break;
}
case GICD_TYPER:
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index e67b3b2..1939c94 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -530,13 +530,13 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 val;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
vgic_access_active_prepare(vcpu, intid);
val = __vgic_mmio_read_active(vcpu, addr, len);
vgic_access_active_finish(vcpu, intid);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
return val;
}
@@ -625,13 +625,13 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
vgic_access_active_prepare(vcpu, intid);
__vgic_mmio_write_cactive(vcpu, addr, len, val);
vgic_access_active_finish(vcpu, intid);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
}
int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
@@ -662,13 +662,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
vgic_access_active_prepare(vcpu, intid);
__vgic_mmio_write_sactive(vcpu, addr, len, val);
vgic_access_active_finish(vcpu, intid);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
}
int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index a413718..3bb0034 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -232,9 +232,8 @@ int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq)
* @kvm: Pointer to the VM being initialized
*
* We may be called each time a vITS is created, or when the
- * vgic is initialized. This relies on kvm->lock to be
- * held. In both cases, the number of vcpus should now be
- * fixed.
+ * vgic is initialized. In both cases, the number of vcpus
+ * should now be fixed.
*/
int vgic_v4_init(struct kvm *kvm)
{
@@ -243,6 +242,8 @@ int vgic_v4_init(struct kvm *kvm)
int nr_vcpus, ret;
unsigned long i;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
if (!kvm_vgic_global_state.has_gicv4)
return 0; /* Nothing to see here... move along. */
@@ -309,14 +310,14 @@ int vgic_v4_init(struct kvm *kvm)
/**
* vgic_v4_teardown - Free the GICv4 data structures
* @kvm: Pointer to the VM being destroyed
- *
- * Relies on kvm->lock to be held.
*/
void vgic_v4_teardown(struct kvm *kvm)
{
struct its_vm *its_vm = &kvm->arch.vgic.its_vm;
int i;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
if (!its_vm->vpes)
return;
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index d97e6080..8be4c1e 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -24,11 +24,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
/*
* Locking order is always:
* kvm->lock (mutex)
- * its->cmd_lock (mutex)
- * its->its_lock (mutex)
- * vgic_cpu->ap_list_lock must be taken with IRQs disabled
- * kvm->lpi_list_lock must be taken with IRQs disabled
- * vgic_irq->irq_lock must be taken with IRQs disabled
+ * vcpu->mutex (mutex)
+ * kvm->arch.config_lock (mutex)
+ * its->cmd_lock (mutex)
+ * its->its_lock (mutex)
+ * vgic_cpu->ap_list_lock must be taken with IRQs disabled
+ * kvm->lpi_list_lock must be taken with IRQs disabled
+ * vgic_irq->irq_lock must be taken with IRQs disabled
*
* As the ap_list_lock might be taken from the timer interrupt handler,
* we have to disable IRQs before taking this lock and everything lower
@@ -573,6 +575,21 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
return 0;
}
+int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid)
+{
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
+ unsigned long flags;
+ int ret = -1;
+
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ if (irq->hw)
+ ret = irq->hwintid;
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ return ret;
+}
+
/**
* kvm_vgic_set_owner - Set the owner of an interrupt for a VM
*
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 7f7f3c5..f9923be 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -273,9 +273,6 @@ int vgic_init(struct kvm *kvm);
void vgic_debug_init(struct kvm *kvm);
void vgic_debug_destroy(struct kvm *kvm);
-bool lock_all_vcpus(struct kvm *kvm);
-void unlock_all_vcpus(struct kvm *kvm);
-
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 37b1340..40ba954 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -23,6 +23,7 @@
HAS_DIT
HAS_E0PD
HAS_ECV
+HAS_ECV_CNTPOFF
HAS_EPAN
HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index dd5a9c7..7063f1a 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1952,6 +1952,10 @@
Fields CONTEXTIDR_ELx
EndSysreg
+Sysreg CNTPOFF_EL2 3 4 14 0 6
+Field 63:0 PhysicalOffset
+EndSysreg
+
Sysreg CPACR_EL12 3 5 1 0 2
Fields CPACR_ELx
EndSysreg
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 2803c9c2..957121a 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -757,7 +757,7 @@ struct kvm_mips_callbacks {
int (*vcpu_run)(struct kvm_vcpu *vcpu);
void (*vcpu_reenter)(struct kvm_vcpu *vcpu);
};
-extern struct kvm_mips_callbacks *kvm_mips_callbacks;
+extern const struct kvm_mips_callbacks * const kvm_mips_callbacks;
int kvm_mips_emulation_init(void);
/* Debug: dump vcpu state */
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index dafab00..3d21cbf 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -3305,7 +3305,7 @@ static struct kvm_mips_callbacks kvm_vz_callbacks = {
};
/* FIXME: Get rid of the callbacks now that trap-and-emulate is gone. */
-struct kvm_mips_callbacks *kvm_mips_callbacks = &kvm_vz_callbacks;
+const struct kvm_mips_callbacks * const kvm_mips_callbacks = &kvm_vz_callbacks;
int kvm_mips_emulation_init(void)
{
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 92a9682..365d272 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -2,7 +2,7 @@
#ifndef __ASM_KASAN_H
#define __ASM_KASAN_H
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) && !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX)
#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn)
#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn)
#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn)
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 2aa0e31..60ba227 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -30,11 +30,17 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
void memcpy_flushcache(void *dest, const void *src, size_t size);
+#ifdef CONFIG_KASAN
+/* __mem variants are used by KASAN to implement instrumented meminstrinsics. */
+#ifdef CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX
+#define __memset memset
+#define __memcpy memcpy
+#define __memmove memmove
+#else /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */
void *__memset(void *s, int c, __kernel_size_t count);
void *__memcpy(void *to, const void *from, __kernel_size_t n);
void *__memmove(void *to, const void *from, __kernel_size_t n);
-
-#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+#ifndef __SANITIZE_ADDRESS__
/*
* For files that are not instrumented (e.g. mm/slub.c) we
* should use not instrumented version of mem* functions.
@@ -46,8 +52,9 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
#ifndef __NO_FORTIFY
#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
#endif
-
-#endif
+#endif /* !__SANITIZE_ADDRESS__ */
+#endif /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */
+#endif /* CONFIG_KASAN */
#ifdef CONFIG_PPC64
#ifndef CONFIG_KASAN
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 5a31986..69623b9 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -13,8 +13,13 @@
# If you really need to reference something from prom_init.o add
# it to the list below:
-grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null
-if [ $? -eq 0 ]
+has_renamed_memintrinsics()
+{
+ grep -q "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} && \
+ ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" ${KCONFIG_CONFIG}
+}
+
+if has_renamed_memintrinsics
then
MEM_FUNCS="__memcpy __memset"
else
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 2bef19c..af46aa8 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -271,11 +271,16 @@ static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma
}
/*
- * Check for a read fault. This could be caused by a read on an
- * inaccessible page (i.e. PROT_NONE), or a Radix MMU execute-only page.
+ * VM_READ, VM_WRITE and VM_EXEC all imply read permissions, as
+ * defined in protection_map[]. Read faults can only be caused by
+ * a PROT_NONE mapping, or with a PROT_EXEC-only mapping on Radix.
*/
- if (unlikely(!(vma->vm_flags & VM_READ)))
+ if (unlikely(!vma_is_accessible(vma)))
return true;
+
+ if (unlikely(radix_enabled() && ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)))
+ return true;
+
/*
* We should ideally do the vma pkey access check here. But in the
* fault path, handle_mm_fault() also does the same check. To avoid
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index b481c5c..21b22bf 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -7,6 +7,7 @@
select OF_DYNAMIC
select FORCE_PCI
select PCI_MSI
+ select GENERIC_ALLOCATOR
select PPC_XICS
select PPC_XIVE_SPAPR
select PPC_ICP_NATIVE
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c5e42cc..5b182d1 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -464,6 +464,28 @@
depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zihintpause)
depends on LLD_VERSION >= 150000 || LD_VERSION >= 23600
+config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
+ def_bool y
+ # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
+ depends on AS_IS_GNU && AS_VERSION >= 23800
+ help
+ Newer binutils versions default to ISA spec version 20191213 which
+ moves some instructions from the I extension to the Zicsr and Zifencei
+ extensions.
+
+config TOOLCHAIN_NEEDS_OLD_ISA_SPEC
+ def_bool y
+ depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
+ # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16
+ depends on CC_IS_CLANG && CLANG_VERSION < 170000
+ help
+ Certain versions of clang do not support zicsr and zifencei via -march
+ but newer versions of binutils require it for the reasons noted in the
+ help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This
+ option causes an older ISA spec compatible with these older versions
+ of clang to be passed to GAS, which has the same result as passing zicsr
+ and zifencei to -march.
+
config FPU
bool "FPU support"
default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 4de83b9..b05e833 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -57,10 +57,12 @@
riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
-# Newer binutils versions default to ISA spec version 20191213 which moves some
-# instructions from the I extension to the Zicsr and Zifencei extensions.
-toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei)
-riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei
+ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC
+KBUILD_CFLAGS += -Wa,-misa-spec=2.2
+KBUILD_AFLAGS += -Wa,-misa-spec=2.2
+else
+riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei
+endif
# Check if the toolchain supports Zihintpause extension
riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 5ff1f19..0099dc1 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -19,8 +19,6 @@ typedef struct {
#ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */
cpumask_t icache_stale_mask;
- /* A local tlb flush is needed before user execution can resume. */
- cpumask_t tlb_stale_mask;
#endif
} mm_context_t;
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 907b9ef..a09196f 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -12,6 +12,8 @@
#include <asm/errata_list.h>
#ifdef CONFIG_MMU
+extern unsigned long asid_mask;
+
static inline void local_flush_tlb_all(void)
{
__asm__ __volatile__ ("sfence.vma" : : : "memory");
@@ -22,24 +24,6 @@ static inline void local_flush_tlb_page(unsigned long addr)
{
ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
}
-
-static inline void local_flush_tlb_all_asid(unsigned long asid)
-{
- __asm__ __volatile__ ("sfence.vma x0, %0"
- :
- : "r" (asid)
- : "memory");
-}
-
-static inline void local_flush_tlb_page_asid(unsigned long addr,
- unsigned long asid)
-{
- __asm__ __volatile__ ("sfence.vma %0, %1"
- :
- : "r" (addr), "r" (asid)
- : "memory");
-}
-
#else /* CONFIG_MMU */
#define local_flush_tlb_all() do { } while (0)
#define local_flush_tlb_page(addr) do { } while (0)
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 80ce9ca..12e22e7 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -22,7 +22,7 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator);
static unsigned long asid_bits;
static unsigned long num_asids;
-static unsigned long asid_mask;
+unsigned long asid_mask;
static atomic_long_t current_version;
@@ -196,16 +196,6 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
if (need_flush_tlb)
local_flush_tlb_all();
-#ifdef CONFIG_SMP
- else {
- cpumask_t *mask = &mm->context.tlb_stale_mask;
-
- if (cpumask_test_cpu(cpu, mask)) {
- cpumask_clear_cpu(cpu, mask);
- local_flush_tlb_all_asid(cntx & asid_mask);
- }
- }
-#endif
}
static void set_mm_noasid(struct mm_struct *mm)
@@ -215,12 +205,24 @@ static void set_mm_noasid(struct mm_struct *mm)
local_flush_tlb_all();
}
-static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+static inline void set_mm(struct mm_struct *prev,
+ struct mm_struct *next, unsigned int cpu)
{
- if (static_branch_unlikely(&use_asid_allocator))
- set_mm_asid(mm, cpu);
- else
- set_mm_noasid(mm);
+ /*
+ * The mm_cpumask indicates which harts' TLBs contain the virtual
+ * address mapping of the mm. Compared to noasid, using asid
+ * can't guarantee that stale TLB entries are invalidated because
+ * the asid mechanism wouldn't flush TLB for every switch_mm for
+ * performance. So when using asid, keep all CPUs footmarks in
+ * cpumask() until mm reset.
+ */
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+ if (static_branch_unlikely(&use_asid_allocator)) {
+ set_mm_asid(next, cpu);
+ } else {
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ set_mm_noasid(next);
+ }
}
static int __init asids_init(void)
@@ -274,7 +276,8 @@ static int __init asids_init(void)
}
early_initcall(asids_init);
#else
-static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+static inline void set_mm(struct mm_struct *prev,
+ struct mm_struct *next, unsigned int cpu)
{
/* Nothing to do here when there is no MMU */
}
@@ -327,10 +330,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
cpu = smp_processor_id();
- cpumask_clear_cpu(cpu, mm_cpumask(prev));
- cpumask_set_cpu(cpu, mm_cpumask(next));
-
- set_mm(next, cpu);
+ set_mm(prev, next, cpu);
flush_icache_deferred(next, cpu);
}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 460f785..d5f3e50 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -143,6 +143,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
no_context(regs, addr);
return;
}
+ if (pud_leaf(*pud_k))
+ goto flush_tlb;
/*
* Since the vmalloc area is global, it is unnecessary
@@ -153,6 +155,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
no_context(regs, addr);
return;
}
+ if (pmd_leaf(*pmd_k))
+ goto flush_tlb;
/*
* Make sure the actual PTE exists as well to
@@ -172,6 +176,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
* ordering constraint, not a cache flush; it is
* necessary even after writing invalid entries.
*/
+flush_tlb:
local_flush_tlb_page(addr);
}
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index ce7dfc8..ef701fa 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -5,7 +5,23 @@
#include <linux/sched.h>
#include <asm/sbi.h>
#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
+
+static inline void local_flush_tlb_all_asid(unsigned long asid)
+{
+ __asm__ __volatile__ ("sfence.vma x0, %0"
+ :
+ : "r" (asid)
+ : "memory");
+}
+
+static inline void local_flush_tlb_page_asid(unsigned long addr,
+ unsigned long asid)
+{
+ __asm__ __volatile__ ("sfence.vma %0, %1"
+ :
+ : "r" (addr), "r" (asid)
+ : "memory");
+}
void flush_tlb_all(void)
{
@@ -15,7 +31,6 @@ void flush_tlb_all(void)
static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long size, unsigned long stride)
{
- struct cpumask *pmask = &mm->context.tlb_stale_mask;
struct cpumask *cmask = mm_cpumask(mm);
unsigned int cpuid;
bool broadcast;
@@ -27,16 +42,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
/* check if the tlbflush needs to be sent to other CPUs */
broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
if (static_branch_unlikely(&use_asid_allocator)) {
- unsigned long asid = atomic_long_read(&mm->context.id);
-
- /*
- * TLB will be immediately flushed on harts concurrently
- * executing this MM context. TLB flush on other harts
- * is deferred until this MM context migrates there.
- */
- cpumask_setall(pmask);
- cpumask_clear_cpu(cpuid, pmask);
- cpumask_andnot(pmask, pmask, cmask);
+ unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
if (broadcast) {
sbi_remote_sfence_vma_asid(cmask, start, size, asid);
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index 9b14045..74b5cd2 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -57,11 +57,19 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
intersects(initrd_data.start, initrd_data.size, safe_addr, size))
safe_addr = initrd_data.start + initrd_data.size;
+ if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) {
+ safe_addr = (unsigned long)comps + comps->len;
+ goto repeat;
+ }
for_each_rb_entry(comp, comps)
if (intersects(safe_addr, size, comp->addr, comp->len)) {
safe_addr = comp->addr + comp->len;
goto repeat;
}
+ if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) {
+ safe_addr = (unsigned long)certs + certs->len;
+ goto repeat;
+ }
for_each_rb_entry(cert, certs)
if (intersects(safe_addr, size, cert->addr, cert->len)) {
safe_addr = cert->addr + cert->len;
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 3c68fe4..4ccf66d 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -23,7 +23,6 @@
CONFIG_MEMCG=y
CONFIG_BLK_CGROUP=y
CONFIG_CFS_BANDWIDTH=y
-CONFIG_RT_GROUP_SCHED=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_RDMA=y
CONFIG_CGROUP_FREEZER=y
@@ -90,7 +89,6 @@
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
CONFIG_ZSMALLOC_STAT=y
@@ -298,7 +296,6 @@
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_TARGET_MASQUERADE=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
@@ -340,7 +337,6 @@
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_PRIO=m
@@ -351,7 +347,6 @@
CONFIG_NET_SCH_TEQL=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_NETEM=m
CONFIG_NET_SCH_DRR=m
CONFIG_NET_SCH_MQPRIO=m
@@ -363,14 +358,11 @@
CONFIG_NET_SCH_PLUG=m
CONFIG_NET_SCH_ETS=m
CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
CONFIG_CLS_U32_PERF=y
CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_CLS_BPF=m
@@ -584,7 +576,7 @@
CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-# CONFIG_HID is not set
+# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
@@ -828,6 +820,7 @@
CONFIG_DETECT_HUNG_TASK=y
CONFIG_WQ_WATCHDOG=y
CONFIG_TEST_LOCKUP=m
+CONFIG_DEBUG_PREEMPT=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
@@ -843,6 +836,7 @@
# CONFIG_RCU_TRACE is not set
CONFIG_LATENCYTOP=y
CONFIG_BOOTTIME_TRACING=y
+CONFIG_FPROBE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
CONFIG_IRQSOFF_TRACER=y
@@ -857,6 +851,7 @@
CONFIG_SAMPLE_TRACE_PRINTK=m
CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
+CONFIG_SAMPLE_FTRACE_OPS=m
CONFIG_DEBUG_ENTRY=y
CONFIG_CIO_INJECT=y
CONFIG_KUNIT=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 9ab9163..693297a 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -21,7 +21,6 @@
CONFIG_MEMCG=y
CONFIG_BLK_CGROUP=y
CONFIG_CFS_BANDWIDTH=y
-CONFIG_RT_GROUP_SCHED=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_RDMA=y
CONFIG_CGROUP_FREEZER=y
@@ -85,7 +84,6 @@
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
CONFIG_ZSMALLOC_STAT=y
@@ -289,7 +287,6 @@
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_TARGET_MASQUERADE=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
@@ -330,7 +327,6 @@
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_PRIO=m
@@ -341,7 +337,6 @@
CONFIG_NET_SCH_TEQL=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_NETEM=m
CONFIG_NET_SCH_DRR=m
CONFIG_NET_SCH_MQPRIO=m
@@ -353,14 +348,11 @@
CONFIG_NET_SCH_PLUG=m
CONFIG_NET_SCH_ETS=m
CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
CONFIG_CLS_U32_PERF=y
CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_CLS_BPF=m
@@ -573,7 +565,7 @@
CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-# CONFIG_HID is not set
+# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
@@ -795,6 +787,7 @@
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
CONFIG_BOOTTIME_TRACING=y
+CONFIG_FPROBE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
CONFIG_SCHED_TRACER=y
@@ -805,6 +798,7 @@
CONFIG_SAMPLE_TRACE_PRINTK=m
CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
+CONFIG_SAMPLE_FTRACE_OPS=m
CONFIG_KUNIT=m
CONFIG_KUNIT_DEBUGFS=y
CONFIG_LKDTM=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index a9c0c81..33a232b 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -58,7 +58,7 @@
# CONFIG_VMCP is not set
# CONFIG_MONWRITER is not set
# CONFIG_S390_VMUR is not set
-# CONFIG_HID is not set
+# CONFIG_HID_SUPPORT is not set
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9250fde..da6dac3 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -305,7 +305,7 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa)
{
- return READ_ONCE(gisa->next_alert) != (u32)(u64)gisa;
+ return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa);
}
static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
@@ -3168,7 +3168,7 @@ void kvm_s390_gisa_init(struct kvm *kvm)
hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
gi->timer.function = gisa_vcpu_kicker;
memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
- gi->origin->next_alert = (u32)(u64)gi->origin;
+ gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
}
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index b124d58..7dab00f 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -112,7 +112,7 @@ static int zpci_reset_aipb(u8 nisc)
return -EINVAL;
aift->sbv = zpci_aif_sbv;
- aift->gait = (struct zpci_gaite *)zpci_aipb->aipb.gait;
+ aift->gait = phys_to_virt(zpci_aipb->aipb.gait);
return 0;
}
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index b6a0219..8d6b765a 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -138,11 +138,15 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
/* Copy to APCB FORMAT1 from APCB FORMAT0 */
static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
- unsigned long apcb_o, struct kvm_s390_apcb1 *apcb_h)
+ unsigned long crycb_gpa, struct kvm_s390_apcb1 *apcb_h)
{
struct kvm_s390_apcb0 tmp;
+ unsigned long apcb_gpa;
- if (read_guest_real(vcpu, apcb_o, &tmp, sizeof(struct kvm_s390_apcb0)))
+ apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
+
+ if (read_guest_real(vcpu, apcb_gpa, &tmp,
+ sizeof(struct kvm_s390_apcb0)))
return -EFAULT;
apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
@@ -157,15 +161,19 @@ static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
* setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
* @vcpu: pointer to the virtual CPU
* @apcb_s: pointer to start of apcb in the shadow crycb
- * @apcb_o: pointer to start of original apcb in the guest2
+ * @crycb_gpa: guest physical address to start of original guest crycb
* @apcb_h: pointer to start of apcb in the guest1
*
* Returns 0 and -EFAULT on error reading guest apcb
*/
static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
- unsigned long apcb_o, unsigned long *apcb_h)
+ unsigned long crycb_gpa, unsigned long *apcb_h)
{
- if (read_guest_real(vcpu, apcb_o, apcb_s,
+ unsigned long apcb_gpa;
+
+ apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
+
+ if (read_guest_real(vcpu, apcb_gpa, apcb_s,
sizeof(struct kvm_s390_apcb0)))
return -EFAULT;
@@ -178,16 +186,20 @@ static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
* setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
* @vcpu: pointer to the virtual CPU
* @apcb_s: pointer to start of apcb in the shadow crycb
- * @apcb_o: pointer to start of original guest apcb
+ * @crycb_gpa: guest physical address to start of original guest crycb
* @apcb_h: pointer to start of apcb in the host
*
* Returns 0 and -EFAULT on error reading guest apcb
*/
static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
- unsigned long apcb_o,
+ unsigned long crycb_gpa,
unsigned long *apcb_h)
{
- if (read_guest_real(vcpu, apcb_o, apcb_s,
+ unsigned long apcb_gpa;
+
+ apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb1);
+
+ if (read_guest_real(vcpu, apcb_gpa, apcb_s,
sizeof(struct kvm_s390_apcb1)))
return -EFAULT;
@@ -200,7 +212,7 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
* setup_apcb - Create a shadow copy of the apcb.
* @vcpu: pointer to the virtual CPU
* @crycb_s: pointer to shadow crycb
- * @crycb_o: pointer to original guest crycb
+ * @crycb_gpa: guest physical address of original guest crycb
* @crycb_h: pointer to the host crycb
* @fmt_o: format of the original guest crycb.
* @fmt_h: format of the host crycb.
@@ -211,50 +223,46 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
* Return 0 or an error number if the guest and host crycb are incompatible.
*/
static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
- const u32 crycb_o,
+ const u32 crycb_gpa,
struct kvm_s390_crypto_cb *crycb_h,
int fmt_o, int fmt_h)
{
- struct kvm_s390_crypto_cb *crycb;
-
- crycb = (struct kvm_s390_crypto_cb *) (unsigned long)crycb_o;
-
switch (fmt_o) {
case CRYCB_FORMAT2:
- if ((crycb_o & PAGE_MASK) != ((crycb_o + 256) & PAGE_MASK))
+ if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 256) & PAGE_MASK))
return -EACCES;
if (fmt_h != CRYCB_FORMAT2)
return -EINVAL;
return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
- (unsigned long) &crycb->apcb1,
+ crycb_gpa,
(unsigned long *)&crycb_h->apcb1);
case CRYCB_FORMAT1:
switch (fmt_h) {
case CRYCB_FORMAT2:
return setup_apcb10(vcpu, &crycb_s->apcb1,
- (unsigned long) &crycb->apcb0,
+ crycb_gpa,
&crycb_h->apcb1);
case CRYCB_FORMAT1:
return setup_apcb00(vcpu,
(unsigned long *) &crycb_s->apcb0,
- (unsigned long) &crycb->apcb0,
+ crycb_gpa,
(unsigned long *) &crycb_h->apcb0);
}
break;
case CRYCB_FORMAT0:
- if ((crycb_o & PAGE_MASK) != ((crycb_o + 32) & PAGE_MASK))
+ if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 32) & PAGE_MASK))
return -EACCES;
switch (fmt_h) {
case CRYCB_FORMAT2:
return setup_apcb10(vcpu, &crycb_s->apcb1,
- (unsigned long) &crycb->apcb0,
+ crycb_gpa,
&crycb_h->apcb1);
case CRYCB_FORMAT1:
case CRYCB_FORMAT0:
return setup_apcb00(vcpu,
(unsigned long *) &crycb_s->apcb0,
- (unsigned long) &crycb->apcb0,
+ crycb_gpa,
(unsigned long *) &crycb_h->apcb0);
}
}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index ef38b15..e16afac 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -544,8 +544,7 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
return r;
}
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
- struct list_head *resources)
+int zpci_setup_bus_resources(struct zpci_dev *zdev)
{
unsigned long addr, size, flags;
struct resource *res;
@@ -581,7 +580,6 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
return -ENOMEM;
}
zdev->bars[i].res = res;
- pci_add_resource(resources, res);
}
zdev->has_resources = 1;
@@ -590,17 +588,23 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
{
+ struct resource *res;
int i;
+ pci_lock_rescan_remove();
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
- if (!zdev->bars[i].size || !zdev->bars[i].res)
+ res = zdev->bars[i].res;
+ if (!res)
continue;
+ release_resource(res);
+ pci_bus_remove_resource(zdev->zbus->bus, res);
zpci_free_iomap(zdev, zdev->bars[i].map_idx);
- release_resource(zdev->bars[i].res);
- kfree(zdev->bars[i].res);
+ zdev->bars[i].res = NULL;
+ kfree(res);
}
zdev->has_resources = 0;
+ pci_unlock_rescan_remove();
}
int pcibios_device_add(struct pci_dev *pdev)
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 6a8da1b..a99926a 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -41,9 +41,7 @@ static int zpci_nb_devices;
*/
static int zpci_bus_prepare_device(struct zpci_dev *zdev)
{
- struct resource_entry *window, *n;
- struct resource *res;
- int rc;
+ int rc, i;
if (!zdev_enabled(zdev)) {
rc = zpci_enable_device(zdev);
@@ -57,10 +55,10 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
}
if (!zdev->has_resources) {
- zpci_setup_bus_resources(zdev, &zdev->zbus->resources);
- resource_list_for_each_entry_safe(window, n, &zdev->zbus->resources) {
- res = window->res;
- pci_bus_add_resource(zdev->zbus->bus, res, 0);
+ zpci_setup_bus_resources(zdev);
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (zdev->bars[i].res)
+ pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0);
}
}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index e96c986..af9f0ac 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -30,8 +30,7 @@ static inline void zpci_zdev_get(struct zpci_dev *zdev)
int zpci_alloc_domain(int domain);
void zpci_free_domain(int domain);
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
- struct list_head *resources);
+int zpci_setup_bus_resources(struct zpci_dev *zdev);
static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
unsigned int devfn)
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 8c45b19..bccea57 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -923,6 +923,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
/* Event overflow */
handled++;
+ status &= ~mask;
perf_sample_data_init(&data, 0, hwc->last_period);
if (!x86_perf_event_set_period(event))
@@ -933,8 +934,6 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
-
- status &= ~mask;
}
/*
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 73c9672..97327a1 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -226,10 +226,9 @@
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
@@ -337,6 +336,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
@@ -369,6 +369,7 @@
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
+#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 8dc345c..13bc212 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -54,8 +54,8 @@ KVM_X86_OP(set_rflags)
KVM_X86_OP(get_if_flag)
KVM_X86_OP(flush_tlb_all)
KVM_X86_OP(flush_tlb_current)
-KVM_X86_OP_OPTIONAL(tlb_remote_flush)
-KVM_X86_OP_OPTIONAL(tlb_remote_flush_with_range)
+KVM_X86_OP_OPTIONAL(flush_remote_tlbs)
+KVM_X86_OP_OPTIONAL(flush_remote_tlbs_range)
KVM_X86_OP(flush_tlb_gva)
KVM_X86_OP(flush_tlb_guest)
KVM_X86_OP(vcpu_pre_run)
@@ -68,6 +68,8 @@ KVM_X86_OP(get_interrupt_shadow)
KVM_X86_OP(patch_hypercall)
KVM_X86_OP(inject_irq)
KVM_X86_OP(inject_nmi)
+KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
+KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
KVM_X86_OP(inject_exception)
KVM_X86_OP(cancel_injection)
KVM_X86_OP(interrupt_allowed)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a45de11..fb9d1f2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -420,6 +420,10 @@ struct kvm_mmu_root_info {
#define KVM_MMU_NUM_PREV_ROOTS 3
+#define KVM_MMU_ROOT_CURRENT BIT(0)
+#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
+#define KVM_MMU_ROOTS_ALL (BIT(1 + KVM_MMU_NUM_PREV_ROOTS) - 1)
+
#define KVM_HAVE_MMU_RWLOCK
struct kvm_mmu_page;
@@ -439,9 +443,8 @@ struct kvm_mmu {
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gpa_t gva_or_gpa, u64 access,
struct x86_exception *exception);
- int (*sync_page)(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp);
- void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
+ int (*sync_spte)(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp, int i);
struct kvm_mmu_root_info root;
union kvm_cpu_role cpu_role;
union kvm_mmu_page_role root_role;
@@ -479,11 +482,6 @@ struct kvm_mmu {
u64 pdptrs[4]; /* pae */
};
-struct kvm_tlb_range {
- u64 start_gfn;
- u64 pages;
-};
-
enum pmc_type {
KVM_PMC_GP = 0,
KVM_PMC_FIXED,
@@ -515,6 +513,7 @@ struct kvm_pmc {
#define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1)
#define KVM_AMD_PMC_MAX_GENERIC 6
struct kvm_pmu {
+ u8 version;
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
unsigned available_event_types;
@@ -527,7 +526,6 @@ struct kvm_pmu {
u64 global_ovf_ctrl_mask;
u64 reserved_bits;
u64 raw_event_mask;
- u8 version;
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;
@@ -876,7 +874,8 @@ struct kvm_vcpu_arch {
u64 tsc_scaling_ratio; /* current scaling ratio */
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
- unsigned nmi_pending; /* NMI queued after currently running handler */
+ /* Number of NMIs pending injection, not including hardware vNMIs. */
+ unsigned int nmi_pending;
bool nmi_injected; /* Trying to inject an NMI this entry */
bool smi_pending; /* SMI queued after currently running handler */
u8 handling_intr_from_guest;
@@ -1585,9 +1584,9 @@ struct kvm_x86_ops {
void (*flush_tlb_all)(struct kvm_vcpu *vcpu);
void (*flush_tlb_current)(struct kvm_vcpu *vcpu);
- int (*tlb_remote_flush)(struct kvm *kvm);
- int (*tlb_remote_flush_with_range)(struct kvm *kvm,
- struct kvm_tlb_range *range);
+ int (*flush_remote_tlbs)(struct kvm *kvm);
+ int (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn,
+ gfn_t nr_pages);
/*
* Flush any TLB entries associated with the given GVA.
@@ -1621,6 +1620,13 @@ struct kvm_x86_ops {
int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
+ /* Whether or not a virtual NMI is pending in hardware. */
+ bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
+ /*
+ * Attempt to pend a virtual NMI in harware. Returns %true on success
+ * to allow using static_call_ret0 as the fallback.
+ */
+ bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
@@ -1791,8 +1797,8 @@ void kvm_arch_free_vm(struct kvm *kvm);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
{
- if (kvm_x86_ops.tlb_remote_flush &&
- !static_call(kvm_x86_tlb_remote_flush)(kvm))
+ if (kvm_x86_ops.flush_remote_tlbs &&
+ !static_call(kvm_x86_flush_remote_tlbs)(kvm))
return 0;
else
return -ENOTSUPP;
@@ -1997,14 +2003,11 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
return !!(*irq_state);
}
-#define KVM_MMU_ROOT_CURRENT BIT(0)
-#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
-#define KVM_MMU_ROOTS_ALL (~0UL)
-
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
+int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
void kvm_update_dr7(struct kvm_vcpu *vcpu);
@@ -2044,8 +2047,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
-void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- gva_t gva, hpa_t root_hpa);
+void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ u64 addr, unsigned long roots);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
@@ -2207,4 +2210,11 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
+/*
+ * KVM previously used a u32 field in kvm_run to indicate the hypercall was
+ * initiated from long mode. KVM now sets bit 0 to indicate long mode, but the
+ * remaining 31 lower bits must be 0 to preserve ABI.
+ */
+#define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1)
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index b8357d6..b63be69 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -128,8 +128,9 @@ struct snp_psc_desc {
struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
} __packed;
-/* Guest message request error code */
+/* Guest message request error codes */
#define SNP_GUEST_REQ_INVALID_LEN BIT_ULL(32)
+#define SNP_GUEST_REQ_ERR_BUSY BIT_ULL(33)
#define GHCB_MSR_TERM_REQ 0x100
#define GHCB_MSR_TERM_REASON_SET_POS 12
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 770dcf7..e7c7379d 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -183,6 +183,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_GIF_SHIFT 9
#define V_GIF_MASK (1 << V_GIF_SHIFT)
+#define V_NMI_PENDING_SHIFT 11
+#define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT)
+
+#define V_NMI_BLOCKING_SHIFT 12
+#define V_NMI_BLOCKING_MASK (1 << V_NMI_BLOCKING_SHIFT)
+
#define V_INTR_PRIO_SHIFT 16
#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
@@ -197,6 +203,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_GIF_ENABLE_SHIFT 25
#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+#define V_NMI_ENABLE_SHIFT 26
+#define V_NMI_ENABLE_MASK (1 << V_NMI_ENABLE_SHIFT)
+
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
@@ -278,7 +287,6 @@ static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
-#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
struct vmcb_seg {
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index 6daa9b0..a3c29b1 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -89,11 +89,21 @@
* Sub-leaf 2: EAX: host tsc frequency in kHz
*/
+#define XEN_CPUID_TSC_EMULATED (1u << 0)
+#define XEN_CPUID_HOST_TSC_RELIABLE (1u << 1)
+#define XEN_CPUID_RDTSCP_INSTR_AVAIL (1u << 2)
+
+#define XEN_CPUID_TSC_MODE_DEFAULT (0)
+#define XEN_CPUID_TSC_MODE_ALWAYS_EMULATE (1u)
+#define XEN_CPUID_TSC_MODE_NEVER_EMULATE (2u)
+#define XEN_CPUID_TSC_MODE_PVRDTSCP (3u)
+
/*
* Leaf 5 (0x40000x04)
* HVM-specific features
* Sub-leaf 0: EAX: Features
* Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
+ * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag)
*/
#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */
#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */
@@ -102,12 +112,16 @@
#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */
#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */
/*
- * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be
- * used to store high bits for the Destination ID. This expands the Destination
- * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ * With interrupt format set to 0 (non-remappable) bits 55:49 from the
+ * IO-APIC RTE and bits 11:5 from the MSI address can be used to store
+ * high bits for the Destination ID. This expands the Destination ID
+ * field from 8 to 15 bits, allowing to target APIC IDs up 32768.
*/
#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5)
-/* Per-vCPU event channel upcalls */
+/*
+ * Per-vCPU event channel upcalls work correctly with physical IRQs
+ * bound to event channels.
+ */
#define XEN_HVM_CPUID_UPCALL_VECTOR (1u << 6)
/*
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 7f467fe..1a6a1f9 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -559,4 +559,7 @@ struct kvm_pmu_event_filter {
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
+/* x86-specific KVM_EXIT_HYPERCALL flags. */
+#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7832a69..2eec60f 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2355,6 +2355,7 @@ static void mce_restart(void)
{
mce_timer_delete_all();
on_each_cpu(mce_cpu_restart, NULL, 1);
+ mce_schedule_work();
}
/* Toggle features for corrected errors */
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index eb07d44..b44c487 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -368,7 +368,6 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
{
struct resctrl_schema *s;
struct rdtgroup *rdtgrp;
- struct rdt_domain *dom;
struct rdt_resource *r;
char *tok, *resname;
int ret = 0;
@@ -397,10 +396,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
goto out;
}
- list_for_each_entry(s, &resctrl_schema_all, list) {
- list_for_each_entry(dom, &s->res->domains, list)
- memset(dom->staged_config, 0, sizeof(dom->staged_config));
- }
+ rdt_staged_configs_clear();
while ((tok = strsep(&buf, "\n")) != NULL) {
resname = strim(strsep(&tok, ":"));
@@ -445,6 +441,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
}
out:
+ rdt_staged_configs_clear();
rdtgroup_kn_unlock(of->kn);
cpus_read_unlock();
return ret ?: nbytes;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 8edecc5..85ceaf9 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -555,5 +555,6 @@ void __check_limbo(struct rdt_domain *d, bool force_free);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
void __init thread_throttle_mode_init(void);
void __init mbm_config_rftype_init(const char *config);
+void rdt_staged_configs_clear(void);
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 884b6e9..6ad33f3 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -78,6 +78,19 @@ void rdt_last_cmd_printf(const char *fmt, ...)
va_end(ap);
}
+void rdt_staged_configs_clear(void)
+{
+ struct rdt_resource *r;
+ struct rdt_domain *dom;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ for_each_alloc_capable_rdt_resource(r) {
+ list_for_each_entry(dom, &r->domains, list)
+ memset(dom->staged_config, 0, sizeof(dom->staged_config));
+ }
+}
+
/*
* Trivial allocator for CLOSIDs. Since h/w only supports a small number,
* we can keep a bitmap of free CLOSIDs in a single integer.
@@ -3107,7 +3120,9 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
{
struct resctrl_schema *s;
struct rdt_resource *r;
- int ret;
+ int ret = 0;
+
+ rdt_staged_configs_clear();
list_for_each_entry(s, &resctrl_schema_all, list) {
r = s->res;
@@ -3119,20 +3134,22 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
} else {
ret = rdtgroup_init_cat(s, rdtgrp->closid);
if (ret < 0)
- return ret;
+ goto out;
}
ret = resctrl_arch_update_domains(r, rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("Failed to initialize allocations\n");
- return ret;
+ goto out;
}
}
rdtgrp->mode = RDT_MODE_SHAREABLE;
- return 0;
+out:
+ rdt_staged_configs_clear();
+ return ret;
}
static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 714166c..0bab497 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1118,21 +1118,20 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
zerofrom = offsetof(struct xregs_state, extended_state_area);
/*
- * The ptrace buffer is in non-compacted XSAVE format. In
- * non-compacted format disabled features still occupy state space,
- * but there is no state to copy from in the compacted
- * init_fpstate. The gap tracking will zero these states.
+ * This 'mask' indicates which states to copy from fpstate.
+ * Those extended states that are not present in fpstate are
+ * either disabled or initialized:
+ *
+ * In non-compacted format, disabled features still occupy
+ * state space but there is no state to copy from in the
+ * compacted init_fpstate. The gap tracking will zero these
+ * states.
+ *
+ * The extended features have an all zeroes init state. Thus,
+ * remove them from 'mask' to zero those features in the user
+ * buffer instead of retrieving them from init_fpstate.
*/
- mask = fpstate->user_xfeatures;
-
- /*
- * Dynamic features are not present in init_fpstate. When they are
- * in an all zeros init state, remove those from 'mask' to zero
- * those features in the user buffer instead of retrieving them
- * from init_fpstate.
- */
- if (fpu_state_size_dynamic())
- mask &= (header.xfeatures | xinit->header.xcomp_bv);
+ mask = header.xfeatures;
for_each_extended_xfeature(i, mask) {
/*
@@ -1151,9 +1150,8 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
pkru.pkru = pkru_val;
membuf_write(&to, &pkru, sizeof(pkru));
} else {
- copy_feature(header.xfeatures & BIT_ULL(i), &to,
+ membuf_write(&to,
__raw_xsave_addr(xsave, i),
- __raw_xsave_addr(xinit, i),
xstate_sizes[i]);
}
/*
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 1265ad5..fb4f1e0 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -136,10 +136,12 @@
RET
SYM_FUNC_END(ftrace_stub)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
SYM_TYPED_FUNC_START(ftrace_stub_graph)
CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(ftrace_stub_graph)
+#endif
#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 679026a..3f664ab 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2183,9 +2183,6 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
struct ghcb *ghcb;
int ret;
- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
- return -ENODEV;
-
if (!fw_err)
return -EINVAL;
@@ -2212,15 +2209,26 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
if (ret)
goto e_put;
- if (ghcb->save.sw_exit_info_2) {
+ *fw_err = ghcb->save.sw_exit_info_2;
+ switch (*fw_err) {
+ case 0:
+ break;
+
+ case SNP_GUEST_REQ_ERR_BUSY:
+ ret = -EAGAIN;
+ break;
+
+ case SNP_GUEST_REQ_INVALID_LEN:
/* Number of expected pages are returned in RBX */
- if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST &&
- ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN)
+ if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
input->data_npages = ghcb_get_rbx(ghcb);
-
- *fw_err = ghcb->save.sw_exit_info_2;
-
+ ret = -ENOSPC;
+ break;
+ }
+ fallthrough;
+ default:
ret = -EIO;
+ break;
}
e_put:
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9583a11..123bf8b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -60,12 +60,6 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
return ret;
}
-/*
- * This one is tied to SSB in the user API, and not
- * visible in /proc/cpuinfo.
- */
-#define KVM_X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
-
#define F feature_bit
/* Scattered Flag - For features that are scattered by cpufeatures.h. */
@@ -266,7 +260,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE))
cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
- kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
cpuid_entry_change(best, X86_FEATURE_APIC,
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
@@ -275,7 +269,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
best = cpuid_entry2_find(entries, nent, 7, 0);
if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
cpuid_entry_change(best, X86_FEATURE_OSPKE,
- kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
best = cpuid_entry2_find(entries, nent, 0xD, 0);
if (best)
@@ -420,7 +414,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
* KVM_SET_CPUID{,2} again. To support this legacy behavior, check
* whether the supplied CPUID data is equal to what's already set.
*/
- if (vcpu->arch.last_vmentry_cpu != -1) {
+ if (kvm_vcpu_has_run(vcpu)) {
r = kvm_cpuid_check_equal(vcpu, e2, nent);
if (r)
return r;
@@ -715,7 +709,7 @@ void kvm_set_cpu_caps(void)
F(CLZERO) | F(XSAVEERPTR) |
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
- __feature_bit(KVM_X86_FEATURE_AMD_PSFD)
+ F(AMD_PSFD)
);
/*
@@ -1002,7 +996,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = 0;
break;
case 0xd: {
- u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
+ u64 permitted_xcr0 = kvm_get_filtered_xcr0();
u64 permitted_xss = kvm_caps.supported_xss;
entry->eax &= permitted_xcr0;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a20bec9..936a397 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1640,6 +1640,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
goto exception;
break;
case VCPU_SREG_CS:
+ /*
+ * KVM uses "none" when loading CS as part of emulating Real
+ * Mode exceptions and IRET (handled above). In all other
+ * cases, loading CS without a control transfer is a KVM bug.
+ */
+ if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
+ goto exception;
+
if (!(seg_desc.type & 8))
goto exception;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 4c91f62..75eae9c 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -4,7 +4,7 @@
#include <linux/kvm_host.h>
-#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
+#define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP)
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
@@ -157,6 +157,14 @@ static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
return vcpu->arch.cr0 & mask;
}
+static __always_inline bool kvm_is_cr0_bit_set(struct kvm_vcpu *vcpu,
+ unsigned long cr0_bit)
+{
+ BUILD_BUG_ON(!is_power_of_2(cr0_bit));
+
+ return !!kvm_read_cr0_bits(vcpu, cr0_bit);
+}
+
static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
{
return kvm_read_cr0_bits(vcpu, ~0UL);
@@ -171,6 +179,14 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
return vcpu->arch.cr4 & mask;
}
+static __always_inline bool kvm_is_cr4_bit_set(struct kvm_vcpu *vcpu,
+ unsigned long cr4_bit)
+{
+ BUILD_BUG_ON(!is_power_of_2(cr4_bit));
+
+ return !!kvm_read_cr4_bits(vcpu, cr4_bit);
+}
+
static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
{
if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c
index 482d663..ded0bd6 100644
--- a/arch/x86/kvm/kvm_onhyperv.c
+++ b/arch/x86/kvm/kvm_onhyperv.c
@@ -10,17 +10,22 @@
#include "hyperv.h"
#include "kvm_onhyperv.h"
+struct kvm_hv_tlb_range {
+ u64 start_gfn;
+ u64 pages;
+};
+
static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
void *data)
{
- struct kvm_tlb_range *range = data;
+ struct kvm_hv_tlb_range *range = data;
return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
range->pages);
}
static inline int hv_remote_flush_root_tdp(hpa_t root_tdp,
- struct kvm_tlb_range *range)
+ struct kvm_hv_tlb_range *range)
{
if (range)
return hyperv_flush_guest_mapping_range(root_tdp,
@@ -29,8 +34,8 @@ static inline int hv_remote_flush_root_tdp(hpa_t root_tdp,
return hyperv_flush_guest_mapping(root_tdp);
}
-int hv_remote_flush_tlb_with_range(struct kvm *kvm,
- struct kvm_tlb_range *range)
+static int __hv_flush_remote_tlbs_range(struct kvm *kvm,
+ struct kvm_hv_tlb_range *range)
{
struct kvm_arch *kvm_arch = &kvm->arch;
struct kvm_vcpu *vcpu;
@@ -86,19 +91,29 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm,
spin_unlock(&kvm_arch->hv_root_tdp_lock);
return ret;
}
-EXPORT_SYMBOL_GPL(hv_remote_flush_tlb_with_range);
-int hv_remote_flush_tlb(struct kvm *kvm)
+int hv_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, gfn_t nr_pages)
{
- return hv_remote_flush_tlb_with_range(kvm, NULL);
+ struct kvm_hv_tlb_range range = {
+ .start_gfn = start_gfn,
+ .pages = nr_pages,
+ };
+
+ return __hv_flush_remote_tlbs_range(kvm, &range);
}
-EXPORT_SYMBOL_GPL(hv_remote_flush_tlb);
+EXPORT_SYMBOL_GPL(hv_flush_remote_tlbs_range);
+
+int hv_flush_remote_tlbs(struct kvm *kvm)
+{
+ return __hv_flush_remote_tlbs_range(kvm, NULL);
+}
+EXPORT_SYMBOL_GPL(hv_flush_remote_tlbs);
void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
{
struct kvm_arch *kvm_arch = &vcpu->kvm->arch;
- if (kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb) {
+ if (kvm_x86_ops.flush_remote_tlbs == hv_flush_remote_tlbs) {
spin_lock(&kvm_arch->hv_root_tdp_lock);
vcpu->arch.hv_root_tdp = root_tdp;
if (root_tdp != kvm_arch->hv_root_tdp)
diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h
index 287e98e..ff127d3 100644
--- a/arch/x86/kvm/kvm_onhyperv.h
+++ b/arch/x86/kvm/kvm_onhyperv.h
@@ -7,9 +7,8 @@
#define __ARCH_X86_KVM_KVM_ONHYPERV_H__
#if IS_ENABLED(CONFIG_HYPERV)
-int hv_remote_flush_tlb_with_range(struct kvm *kvm,
- struct kvm_tlb_range *range);
-int hv_remote_flush_tlb(struct kvm *kvm);
+int hv_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, gfn_t nr_pages);
+int hv_flush_remote_tlbs(struct kvm *kvm);
void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp);
#else /* !CONFIG_HYPERV */
static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 168c46f..92d5a19 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
+void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
@@ -132,7 +134,7 @@ static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
{
BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0);
- return kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)
+ return kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)
? cr3 & X86_CR3_PCID_MASK
: 0;
}
@@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
vcpu->arch.mmu->root_role.level);
}
+static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu)
+{
+ /*
+ * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e.
+ * @mmu's snapshot of CR0.WP and thus all related paging metadata may
+ * be stale. Refresh CR0.WP and the metadata on-demand when checking
+ * for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing
+ * nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does
+ * need to refresh nested_mmu, a.k.a. the walker used to translate L2
+ * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP.
+ */
+ if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu)
+ return;
+
+ __kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
+}
+
/*
* Check if a given access (described through the I/D, W/R and U/S bits of a
* page fault error code pfec) causes a permission fault with the given PTE
@@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 implicit_access = access & PFERR_IMPLICIT_ACCESS;
bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC;
int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1;
- bool fault = (mmu->permissions[index] >> pte_access) & 1;
u32 errcode = PFERR_PRESENT_MASK;
+ bool fault;
+
+ kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
+
+ fault = (mmu->permissions[index] >> pte_access) & 1;
WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
if (unlikely(mmu->pkru_mask)) {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 144c5a0..c8961f4 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -125,17 +125,31 @@ module_param(dbg, bool, 0644);
#define PTE_LIST_EXT 14
/*
- * Slight optimization of cacheline layout, by putting `more' and `spte_count'
- * at the start; then accessing it will only use one single cacheline for
- * either full (entries==PTE_LIST_EXT) case or entries<=6.
+ * struct pte_list_desc is the core data structure used to implement a custom
+ * list for tracking a set of related SPTEs, e.g. all the SPTEs that map a
+ * given GFN when used in the context of rmaps. Using a custom list allows KVM
+ * to optimize for the common case where many GFNs will have at most a handful
+ * of SPTEs pointing at them, i.e. allows packing multiple SPTEs into a small
+ * memory footprint, which in turn improves runtime performance by exploiting
+ * cache locality.
+ *
+ * A list is comprised of one or more pte_list_desc objects (descriptors).
+ * Each individual descriptor stores up to PTE_LIST_EXT SPTEs. If a descriptor
+ * is full and a new SPTEs needs to be added, a new descriptor is allocated and
+ * becomes the head of the list. This means that by definitions, all tail
+ * descriptors are full.
+ *
+ * Note, the meta data fields are deliberately placed at the start of the
+ * structure to optimize the cacheline layout; accessing the descriptor will
+ * touch only a single cacheline so long as @spte_count<=6 (or if only the
+ * descriptors metadata is accessed).
*/
struct pte_list_desc {
struct pte_list_desc *more;
- /*
- * Stores number of entries stored in the pte_list_desc. No need to be
- * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
- */
- u64 spte_count;
+ /* The number of PTEs stored in _this_ descriptor. */
+ u32 spte_count;
+ /* The number of PTEs stored in all tails of this descriptor. */
+ u32 tail_count;
u64 *sptes[PTE_LIST_EXT];
};
@@ -242,34 +256,37 @@ static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
return regs;
}
-static inline bool kvm_available_flush_tlb_with_range(void)
+static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu)
{
- return kvm_x86_ops.tlb_remote_flush_with_range;
+ return kvm_read_cr3(vcpu);
}
-static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
- struct kvm_tlb_range *range)
+static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu)
{
- int ret = -ENOTSUPP;
+ if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3)
+ return kvm_read_cr3(vcpu);
- if (range && kvm_x86_ops.tlb_remote_flush_with_range)
- ret = static_call(kvm_x86_tlb_remote_flush_with_range)(kvm, range);
+ return mmu->get_guest_pgd(vcpu);
+}
+static inline bool kvm_available_flush_remote_tlbs_range(void)
+{
+ return kvm_x86_ops.flush_remote_tlbs_range;
+}
+
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
+ gfn_t nr_pages)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (kvm_x86_ops.flush_remote_tlbs_range)
+ ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
+ nr_pages);
if (ret)
kvm_flush_remote_tlbs(kvm);
}
-void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
- u64 start_gfn, u64 pages)
-{
- struct kvm_tlb_range range;
-
- range.start_gfn = start_gfn;
- range.pages = pages;
-
- kvm_flush_remote_tlbs_with_range(kvm, &range);
-}
-
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
/* Flush the range of guest memory mapped by the given SPTE. */
@@ -888,9 +905,9 @@ static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
untrack_possible_nx_huge_page(kvm, sp);
}
-static struct kvm_memory_slot *
-gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
- bool no_dirty_log)
+static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu,
+ gfn_t gfn,
+ bool no_dirty_log)
{
struct kvm_memory_slot *slot;
@@ -929,53 +946,69 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
desc->sptes[0] = (u64 *)rmap_head->val;
desc->sptes[1] = spte;
desc->spte_count = 2;
+ desc->tail_count = 0;
rmap_head->val = (unsigned long)desc | 1;
++count;
} else {
rmap_printk("%p %llx many->many\n", spte, *spte);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
- while (desc->spte_count == PTE_LIST_EXT) {
- count += PTE_LIST_EXT;
- if (!desc->more) {
- desc->more = kvm_mmu_memory_cache_alloc(cache);
- desc = desc->more;
- desc->spte_count = 0;
- break;
- }
- desc = desc->more;
+ count = desc->tail_count + desc->spte_count;
+
+ /*
+ * If the previous head is full, allocate a new head descriptor
+ * as tail descriptors are always kept full.
+ */
+ if (desc->spte_count == PTE_LIST_EXT) {
+ desc = kvm_mmu_memory_cache_alloc(cache);
+ desc->more = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ desc->spte_count = 0;
+ desc->tail_count = count;
+ rmap_head->val = (unsigned long)desc | 1;
}
- count += desc->spte_count;
desc->sptes[desc->spte_count++] = spte;
}
return count;
}
-static void
-pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
- struct pte_list_desc *desc, int i,
- struct pte_list_desc *prev_desc)
+static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+ struct pte_list_desc *desc, int i)
{
- int j = desc->spte_count - 1;
+ struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ int j = head_desc->spte_count - 1;
- desc->sptes[i] = desc->sptes[j];
- desc->sptes[j] = NULL;
- desc->spte_count--;
- if (desc->spte_count)
+ /*
+ * The head descriptor should never be empty. A new head is added only
+ * when adding an entry and the previous head is full, and heads are
+ * removed (this flow) when they become empty.
+ */
+ BUG_ON(j < 0);
+
+ /*
+ * Replace the to-be-freed SPTE with the last valid entry from the head
+ * descriptor to ensure that tail descriptors are full at all times.
+ * Note, this also means that tail_count is stable for each descriptor.
+ */
+ desc->sptes[i] = head_desc->sptes[j];
+ head_desc->sptes[j] = NULL;
+ head_desc->spte_count--;
+ if (head_desc->spte_count)
return;
- if (!prev_desc && !desc->more)
+
+ /*
+ * The head descriptor is empty. If there are no tail descriptors,
+ * nullify the rmap head to mark the list as emtpy, else point the rmap
+ * head at the next descriptor, i.e. the new head.
+ */
+ if (!head_desc->more)
rmap_head->val = 0;
else
- if (prev_desc)
- prev_desc->more = desc->more;
- else
- rmap_head->val = (unsigned long)desc->more | 1;
- mmu_free_pte_list_desc(desc);
+ rmap_head->val = (unsigned long)head_desc->more | 1;
+ mmu_free_pte_list_desc(head_desc);
}
static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
- struct pte_list_desc *prev_desc;
int i;
if (!rmap_head->val) {
@@ -991,16 +1024,13 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
} else {
rmap_printk("%p many->many\n", spte);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
- prev_desc = NULL;
while (desc) {
for (i = 0; i < desc->spte_count; ++i) {
if (desc->sptes[i] == spte) {
- pte_list_desc_remove_entry(rmap_head,
- desc, i, prev_desc);
+ pte_list_desc_remove_entry(rmap_head, desc, i);
return;
}
}
- prev_desc = desc;
desc = desc->more;
}
pr_err("%s: %p many->many\n", __func__, spte);
@@ -1047,7 +1077,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
- unsigned int count = 0;
if (!rmap_head->val)
return 0;
@@ -1055,13 +1084,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
return 1;
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-
- while (desc) {
- count += desc->spte_count;
- desc = desc->more;
- }
-
- return count;
+ return desc->tail_count + desc->spte_count;
}
static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
@@ -1073,14 +1096,6 @@ static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
}
-static bool rmap_can_add(struct kvm_vcpu *vcpu)
-{
- struct kvm_mmu_memory_cache *mc;
-
- mc = &vcpu->arch.mmu_pte_list_desc_cache;
- return kvm_mmu_memory_cache_nr_free_objects(mc);
-}
-
static void rmap_remove(struct kvm *kvm, u64 *spte)
{
struct kvm_memslots *slots;
@@ -1479,7 +1494,7 @@ static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
}
}
- if (need_flush && kvm_available_flush_tlb_with_range()) {
+ if (need_flush && kvm_available_flush_remote_tlbs_range()) {
kvm_flush_remote_tlbs_gfn(kvm, gfn, level);
return false;
}
@@ -1504,8 +1519,8 @@ struct slot_rmap_walk_iterator {
struct kvm_rmap_head *end_rmap;
};
-static void
-rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
+static void rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator,
+ int level)
{
iterator->level = level;
iterator->gfn = iterator->start_gfn;
@@ -1513,10 +1528,10 @@ rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
iterator->end_rmap = gfn_to_rmap(iterator->end_gfn, level, iterator->slot);
}
-static void
-slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator,
- const struct kvm_memory_slot *slot, int start_level,
- int end_level, gfn_t start_gfn, gfn_t end_gfn)
+static void slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator,
+ const struct kvm_memory_slot *slot,
+ int start_level, int end_level,
+ gfn_t start_gfn, gfn_t end_gfn)
{
iterator->slot = slot;
iterator->start_level = start_level;
@@ -1789,12 +1804,6 @@ static void mark_unsync(u64 *spte)
kvm_mmu_mark_parents_unsync(sp);
}
-static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp)
-{
- return -1;
-}
-
#define KVM_PAGE_ARRAY_NR 16
struct kvm_mmu_pages {
@@ -1914,10 +1923,79 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp)
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else
+static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+ union kvm_mmu_page_role root_role = vcpu->arch.mmu->root_role;
+
+ /*
+ * Ignore various flags when verifying that it's safe to sync a shadow
+ * page using the current MMU context.
+ *
+ * - level: not part of the overall MMU role and will never match as the MMU's
+ * level tracks the root level
+ * - access: updated based on the new guest PTE
+ * - quadrant: not part of the overall MMU role (similar to level)
+ */
+ const union kvm_mmu_page_role sync_role_ign = {
+ .level = 0xf,
+ .access = 0x7,
+ .quadrant = 0x3,
+ .passthrough = 0x1,
+ };
+
+ /*
+ * Direct pages can never be unsync, and KVM should never attempt to
+ * sync a shadow page for a different MMU context, e.g. if the role
+ * differs then the memslot lookup (SMM vs. non-SMM) will be bogus, the
+ * reserved bits checks will be wrong, etc...
+ */
+ if (WARN_ON_ONCE(sp->role.direct || !vcpu->arch.mmu->sync_spte ||
+ (sp->role.word ^ root_role.word) & ~sync_role_ign.word))
+ return false;
+
+ return true;
+}
+
+static int kvm_sync_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i)
+{
+ if (!sp->spt[i])
+ return 0;
+
+ return vcpu->arch.mmu->sync_spte(vcpu, sp, i);
+}
+
+static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+ int flush = 0;
+ int i;
+
+ if (!kvm_sync_page_check(vcpu, sp))
+ return -1;
+
+ for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
+ int ret = kvm_sync_spte(vcpu, sp, i);
+
+ if (ret < -1)
+ return -1;
+ flush |= ret;
+ }
+
+ /*
+ * Note, any flush is purely for KVM's correctness, e.g. when dropping
+ * an existing SPTE or clearing W/A/D bits to ensure an mmu_notifier
+ * unmap or dirty logging event doesn't fail to flush. The guest is
+ * responsible for flushing the TLB to ensure any changes in protection
+ * bits are recognized, i.e. until the guest flushes or page faults on
+ * a relevant address, KVM is architecturally allowed to let vCPUs use
+ * cached translations with the old protection bits.
+ */
+ return flush;
+}
+
static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
- int ret = vcpu->arch.mmu->sync_page(vcpu, sp);
+ int ret = __kvm_sync_page(vcpu, sp);
if (ret < 0)
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
@@ -3304,9 +3382,9 @@ static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
* Returns true if the SPTE was fixed successfully. Otherwise,
* someone else modified the SPTE from its original value.
*/
-static bool
-fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
- u64 *sptep, u64 old_spte, u64 new_spte)
+static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu,
+ struct kvm_page_fault *fault,
+ u64 *sptep, u64 old_spte, u64 new_spte)
{
/*
* Theoretically we could also set dirty bit (and flush TLB) here in
@@ -3513,6 +3591,8 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
LIST_HEAD(invalid_list);
bool free_active_root;
+ WARN_ON_ONCE(roots_to_free & ~KVM_MMU_ROOTS_ALL);
+
BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
/* Before acquiring the MMU lock, see if we need to do any real work. */
@@ -3731,7 +3811,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
int quadrant, i, r;
hpa_t root;
- root_pgd = mmu->get_guest_pgd(vcpu);
+ root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
root_gfn = root_pgd >> PAGE_SHIFT;
if (mmu_check_root(vcpu, root_gfn))
@@ -4181,7 +4261,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
arch.token = alloc_apf_token(vcpu);
arch.gfn = gfn;
arch.direct_map = vcpu->arch.mmu->root_role.direct;
- arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
+ arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
return kvm_setup_async_pf(vcpu, cr2_or_gpa,
kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
@@ -4200,7 +4280,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
return;
if (!vcpu->arch.mmu->root_role.direct &&
- work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
+ work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
return;
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
@@ -4469,8 +4549,7 @@ static void nonpaging_init_context(struct kvm_mmu *context)
{
context->page_fault = nonpaging_page_fault;
context->gva_to_gpa = nonpaging_gva_to_gpa;
- context->sync_page = nonpaging_sync_page;
- context->invlpg = NULL;
+ context->sync_spte = NULL;
}
static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
@@ -4604,11 +4683,6 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
}
EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
-static unsigned long get_cr3(struct kvm_vcpu *vcpu)
-{
- return kvm_read_cr3(vcpu);
-}
-
static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
unsigned int access)
{
@@ -4638,10 +4712,9 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
#include "paging_tmpl.h"
#undef PTTYPE
-static void
-__reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
- u64 pa_bits_rsvd, int level, bool nx, bool gbpages,
- bool pse, bool amd)
+static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
+ u64 pa_bits_rsvd, int level, bool nx,
+ bool gbpages, bool pse, bool amd)
{
u64 gbpages_bit_rsvd = 0;
u64 nonleaf_bit8_rsvd = 0;
@@ -4754,9 +4827,9 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
guest_cpuid_is_amd_or_hygon(vcpu));
}
-static void
-__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
- u64 pa_bits_rsvd, bool execonly, int huge_page_level)
+static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
+ u64 pa_bits_rsvd, bool execonly,
+ int huge_page_level)
{
u64 high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51);
u64 large_1g_rsvd = 0, large_2m_rsvd = 0;
@@ -4856,8 +4929,7 @@ static inline bool boot_cpu_is_amd(void)
* the direct page table on host, use as much mmu features as
* possible, however, kvm currently does not do execution-protection.
*/
-static void
-reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context)
+static void reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context)
{
struct rsvd_bits_validate *shadow_zero_check;
int i;
@@ -5060,20 +5132,18 @@ static void paging64_init_context(struct kvm_mmu *context)
{
context->page_fault = paging64_page_fault;
context->gva_to_gpa = paging64_gva_to_gpa;
- context->sync_page = paging64_sync_page;
- context->invlpg = paging64_invlpg;
+ context->sync_spte = paging64_sync_spte;
}
static void paging32_init_context(struct kvm_mmu *context)
{
context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa;
- context->sync_page = paging32_sync_page;
- context->invlpg = paging32_invlpg;
+ context->sync_spte = paging32_sync_spte;
}
-static union kvm_cpu_role
-kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs)
+static union kvm_cpu_role kvm_calc_cpu_role(struct kvm_vcpu *vcpu,
+ const struct kvm_mmu_role_regs *regs)
{
union kvm_cpu_role role = {0};
@@ -5112,6 +5182,21 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs)
return role;
}
+void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu)
+{
+ const bool cr0_wp = kvm_is_cr0_bit_set(vcpu, X86_CR0_WP);
+
+ BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP);
+ BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS));
+
+ if (is_cr0_wp(mmu) == cr0_wp)
+ return;
+
+ mmu->cpu_role.base.cr0_wp = cr0_wp;
+ reset_guest_paging_metadata(vcpu, mmu);
+}
+
static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
{
/* tdp_root_level is architecture forced level, use it if nonzero */
@@ -5157,9 +5242,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
context->cpu_role.as_u64 = cpu_role.as_u64;
context->root_role.word = root_role.word;
context->page_fault = kvm_tdp_page_fault;
- context->sync_page = nonpaging_sync_page;
- context->invlpg = NULL;
- context->get_guest_pgd = get_cr3;
+ context->sync_spte = NULL;
+ context->get_guest_pgd = get_guest_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
@@ -5289,8 +5373,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->page_fault = ept_page_fault;
context->gva_to_gpa = ept_gva_to_gpa;
- context->sync_page = ept_sync_page;
- context->invlpg = ept_invlpg;
+ context->sync_spte = ept_sync_spte;
update_permission_bitmask(context, true);
context->pkru_mask = 0;
@@ -5309,7 +5392,7 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu,
kvm_init_shadow_mmu(vcpu, cpu_role);
- context->get_guest_pgd = get_cr3;
+ context->get_guest_pgd = get_guest_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
}
@@ -5323,7 +5406,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
return;
g_context->cpu_role.as_u64 = new_mode.as_u64;
- g_context->get_guest_pgd = get_cr3;
+ g_context->get_guest_pgd = get_guest_cr3;
g_context->get_pdptr = kvm_pdptr_read;
g_context->inject_page_fault = kvm_inject_page_fault;
@@ -5331,7 +5414,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
* L2 page tables are never shadowed, so there is no need to sync
* SPTEs.
*/
- g_context->invlpg = NULL;
+ g_context->sync_spte = NULL;
/*
* Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
@@ -5393,7 +5476,7 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
* Changing guest CPUID after KVM_RUN is forbidden, see the comment in
* kvm_arch_vcpu_ioctl().
*/
- KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
+ KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm);
}
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@@ -5707,48 +5790,77 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
-void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- gva_t gva, hpa_t root_hpa)
+static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ u64 addr, hpa_t root_hpa)
+{
+ struct kvm_shadow_walk_iterator iterator;
+
+ vcpu_clear_mmio_info(vcpu, addr);
+
+ if (!VALID_PAGE(root_hpa))
+ return;
+
+ write_lock(&vcpu->kvm->mmu_lock);
+ for_each_shadow_entry_using_root(vcpu, root_hpa, addr, iterator) {
+ struct kvm_mmu_page *sp = sptep_to_sp(iterator.sptep);
+
+ if (sp->unsync) {
+ int ret = kvm_sync_spte(vcpu, sp, iterator.index);
+
+ if (ret < 0)
+ mmu_page_zap_pte(vcpu->kvm, sp, iterator.sptep, NULL);
+ if (ret)
+ kvm_flush_remote_tlbs_sptep(vcpu->kvm, iterator.sptep);
+ }
+
+ if (!sp->unsync_children)
+ break;
+ }
+ write_unlock(&vcpu->kvm->mmu_lock);
+}
+
+void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ u64 addr, unsigned long roots)
{
int i;
+ WARN_ON_ONCE(roots & ~KVM_MMU_ROOTS_ALL);
+
/* It's actually a GPA for vcpu->arch.guest_mmu. */
if (mmu != &vcpu->arch.guest_mmu) {
/* INVLPG on a non-canonical address is a NOP according to the SDM. */
- if (is_noncanonical_address(gva, vcpu))
+ if (is_noncanonical_address(addr, vcpu))
return;
- static_call(kvm_x86_flush_tlb_gva)(vcpu, gva);
+ static_call(kvm_x86_flush_tlb_gva)(vcpu, addr);
}
- if (!mmu->invlpg)
+ if (!mmu->sync_spte)
return;
- if (root_hpa == INVALID_PAGE) {
- mmu->invlpg(vcpu, gva, mmu->root.hpa);
+ if (roots & KVM_MMU_ROOT_CURRENT)
+ __kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->root.hpa);
- /*
- * INVLPG is required to invalidate any global mappings for the VA,
- * irrespective of PCID. Since it would take us roughly similar amount
- * of work to determine whether any of the prev_root mappings of the VA
- * is marked global, or to just sync it blindly, so we might as well
- * just always sync it.
- *
- * Mappings not reachable via the current cr3 or the prev_roots will be
- * synced when switching to that cr3, so nothing needs to be done here
- * for them.
- */
- for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
- if (VALID_PAGE(mmu->prev_roots[i].hpa))
- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
- } else {
- mmu->invlpg(vcpu, gva, root_hpa);
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ if (roots & KVM_MMU_ROOT_PREVIOUS(i))
+ __kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->prev_roots[i].hpa);
}
}
+EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_addr);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
- kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
+ /*
+ * INVLPG is required to invalidate any global mappings for the VA,
+ * irrespective of PCID. Blindly sync all roots as it would take
+ * roughly the same amount of work/time to determine whether any of the
+ * previous roots have a global mapping.
+ *
+ * Mappings not reachable via the current or previous cached roots will
+ * be synced when switching to that new cr3, so nothing needs to be
+ * done here for them.
+ */
+ kvm_mmu_invalidate_addr(vcpu, vcpu->arch.walk_mmu, gva, KVM_MMU_ROOTS_ALL);
++vcpu->stat.invlpg;
}
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@@ -5757,27 +5869,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
- bool tlb_flush = false;
+ unsigned long roots = 0;
uint i;
- if (pcid == kvm_get_active_pcid(vcpu)) {
- if (mmu->invlpg)
- mmu->invlpg(vcpu, gva, mmu->root.hpa);
- tlb_flush = true;
- }
+ if (pcid == kvm_get_active_pcid(vcpu))
+ roots |= KVM_MMU_ROOT_CURRENT;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
- pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
- if (mmu->invlpg)
- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
- tlb_flush = true;
- }
+ pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd))
+ roots |= KVM_MMU_ROOT_PREVIOUS(i);
}
- if (tlb_flush)
- static_call(kvm_x86_flush_tlb_gva)(vcpu, gva);
-
+ if (roots)
+ kvm_mmu_invalidate_addr(vcpu, mmu, gva, roots);
++vcpu->stat.invlpg;
/*
@@ -5814,29 +5919,30 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
EXPORT_SYMBOL_GPL(kvm_configure_mmu);
/* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm,
+typedef bool (*slot_rmaps_handler) (struct kvm *kvm,
struct kvm_rmap_head *rmap_head,
const struct kvm_memory_slot *slot);
-/* The caller should hold mmu-lock before calling this function. */
-static __always_inline bool
-slot_handle_level_range(struct kvm *kvm, const struct kvm_memory_slot *memslot,
- slot_level_handler fn, int start_level, int end_level,
- gfn_t start_gfn, gfn_t end_gfn, bool flush_on_yield,
- bool flush)
+static __always_inline bool __walk_slot_rmaps(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ slot_rmaps_handler fn,
+ int start_level, int end_level,
+ gfn_t start_gfn, gfn_t end_gfn,
+ bool flush_on_yield, bool flush)
{
struct slot_rmap_walk_iterator iterator;
- for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
+ lockdep_assert_held_write(&kvm->mmu_lock);
+
+ for_each_slot_rmap_range(slot, start_level, end_level, start_gfn,
end_gfn, &iterator) {
if (iterator.rmap)
- flush |= fn(kvm, iterator.rmap, memslot);
+ flush |= fn(kvm, iterator.rmap, slot);
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
if (flush && flush_on_yield) {
- kvm_flush_remote_tlbs_with_address(kvm,
- start_gfn,
- iterator.gfn - start_gfn + 1);
+ kvm_flush_remote_tlbs_range(kvm, start_gfn,
+ iterator.gfn - start_gfn + 1);
flush = false;
}
cond_resched_rwlock_write(&kvm->mmu_lock);
@@ -5846,23 +5952,23 @@ slot_handle_level_range(struct kvm *kvm, const struct kvm_memory_slot *memslot,
return flush;
}
-static __always_inline bool
-slot_handle_level(struct kvm *kvm, const struct kvm_memory_slot *memslot,
- slot_level_handler fn, int start_level, int end_level,
- bool flush_on_yield)
+static __always_inline bool walk_slot_rmaps(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ slot_rmaps_handler fn,
+ int start_level, int end_level,
+ bool flush_on_yield)
{
- return slot_handle_level_range(kvm, memslot, fn, start_level,
- end_level, memslot->base_gfn,
- memslot->base_gfn + memslot->npages - 1,
- flush_on_yield, false);
+ return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level,
+ slot->base_gfn, slot->base_gfn + slot->npages - 1,
+ flush_on_yield, false);
}
-static __always_inline bool
-slot_handle_level_4k(struct kvm *kvm, const struct kvm_memory_slot *memslot,
- slot_level_handler fn, bool flush_on_yield)
+static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ slot_rmaps_handler fn,
+ bool flush_on_yield)
{
- return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K,
- PG_LEVEL_4K, flush_on_yield);
+ return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield);
}
static void free_mmu_pages(struct kvm_mmu *mmu)
@@ -6157,9 +6263,9 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e
if (WARN_ON_ONCE(start >= end))
continue;
- flush = slot_handle_level_range(kvm, memslot, __kvm_zap_rmap,
- PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
- start, end - 1, true, flush);
+ flush = __walk_slot_rmaps(kvm, memslot, __kvm_zap_rmap,
+ PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
+ start, end - 1, true, flush);
}
}
@@ -6191,8 +6297,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
}
if (flush)
- kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
- gfn_end - gfn_start);
+ kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);
kvm_mmu_invalidate_end(kvm, 0, -1ul);
@@ -6212,8 +6317,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
{
if (kvm_memslots_have_rmaps(kvm)) {
write_lock(&kvm->mmu_lock);
- slot_handle_level(kvm, memslot, slot_rmap_write_protect,
- start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
+ walk_slot_rmaps(kvm, memslot, slot_rmap_write_protect,
+ start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
write_unlock(&kvm->mmu_lock);
}
@@ -6448,10 +6553,9 @@ static void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm,
* all the way to the target level. There's no need to split pages
* already at the target level.
*/
- for (level = KVM_MAX_HUGEPAGE_LEVEL; level > target_level; level--) {
- slot_handle_level_range(kvm, slot, shadow_mmu_try_split_huge_pages,
- level, level, start, end - 1, true, false);
- }
+ for (level = KVM_MAX_HUGEPAGE_LEVEL; level > target_level; level--)
+ __walk_slot_rmaps(kvm, slot, shadow_mmu_try_split_huge_pages,
+ level, level, start, end - 1, true, false);
}
/* Must be called with the mmu_lock held in write-mode. */
@@ -6530,7 +6634,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
PG_LEVEL_NUM)) {
kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
- if (kvm_available_flush_tlb_with_range())
+ if (kvm_available_flush_remote_tlbs_range())
kvm_flush_remote_tlbs_sptep(kvm, sptep);
else
need_tlb_flush = 1;
@@ -6549,8 +6653,8 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
* Note, use KVM_MAX_HUGEPAGE_LEVEL - 1 since there's no need to zap
* pages that are already mapped at the maximum hugepage level.
*/
- if (slot_handle_level(kvm, slot, kvm_mmu_zap_collapsible_spte,
- PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
+ if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
+ PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
}
@@ -6581,8 +6685,7 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
* is observed by any other operation on the same memslot.
*/
lockdep_assert_held(&kvm->slots_lock);
- kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
- memslot->npages);
+ kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
}
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
@@ -6594,7 +6697,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
* Clear dirty bits only on 4k SPTEs since the legacy MMU only
* support dirty logging at a 4k granularity.
*/
- slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
+ walk_slot_rmaps_4k(kvm, memslot, __rmap_clear_dirty, false);
write_unlock(&kvm->mmu_lock);
}
@@ -6664,8 +6767,8 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
}
}
-static unsigned long
-mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long mmu_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
{
struct kvm *kvm;
int nr_to_scan = sc->nr_to_scan;
@@ -6723,8 +6826,8 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
return freed;
}
-static unsigned long
-mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long mmu_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc)
{
return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
}
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 2cbb155..d39af56 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -170,14 +170,14 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn,
int min_level);
-void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
- u64 start_gfn, u64 pages);
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
+ gfn_t nr_pages);
/* Flush the given page (huge or not) of guest memory. */
static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
{
- kvm_flush_remote_tlbs_with_address(kvm, gfn_round_for_level(gfn, level),
- KVM_PAGES_PER_HPAGE(level));
+ kvm_flush_remote_tlbs_range(kvm, gfn_round_for_level(gfn, level),
+ KVM_PAGES_PER_HPAGE(level));
}
unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index a056f27..0662e02 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -324,7 +324,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
trace_kvm_mmu_pagetable_walk(addr, access);
retry_walk:
walker->level = mmu->cpu_role.base.level;
- pte = mmu->get_guest_pgd(vcpu);
+ pte = kvm_mmu_get_guest_pgd(vcpu, mmu);
have_ad = PT_HAVE_ACCESSED_DIRTY(mmu);
#if PTTYPE == 64
@@ -519,7 +519,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
static bool
FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, pt_element_t gpte, bool no_dirty_log)
+ u64 *spte, pt_element_t gpte)
{
struct kvm_memory_slot *slot;
unsigned pte_access;
@@ -535,8 +535,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
pte_access = sp->role.access & FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
- slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn,
- no_dirty_log && (pte_access & ACC_WRITE_MASK));
+ slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, pte_access & ACC_WRITE_MASK);
if (!slot)
return false;
@@ -605,7 +604,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
if (is_shadow_present_pte(*spte))
continue;
- if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
+ if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i]))
break;
}
}
@@ -846,64 +845,6 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
}
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
-{
- struct kvm_shadow_walk_iterator iterator;
- struct kvm_mmu_page *sp;
- u64 old_spte;
- int level;
- u64 *sptep;
-
- vcpu_clear_mmio_info(vcpu, gva);
-
- /*
- * No need to check return value here, rmap_can_add() can
- * help us to skip pte prefetch later.
- */
- mmu_topup_memory_caches(vcpu, true);
-
- if (!VALID_PAGE(root_hpa)) {
- WARN_ON(1);
- return;
- }
-
- write_lock(&vcpu->kvm->mmu_lock);
- for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
- level = iterator.level;
- sptep = iterator.sptep;
-
- sp = sptep_to_sp(sptep);
- old_spte = *sptep;
- if (is_last_spte(old_spte, level)) {
- pt_element_t gpte;
- gpa_t pte_gpa;
-
- if (!sp->unsync)
- break;
-
- pte_gpa = FNAME(get_level1_sp_gpa)(sp);
- pte_gpa += spte_index(sptep) * sizeof(pt_element_t);
-
- mmu_page_zap_pte(vcpu->kvm, sp, sptep, NULL);
- if (is_shadow_present_pte(old_spte))
- kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
-
- if (!rmap_can_add(vcpu))
- break;
-
- if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
- sizeof(pt_element_t)))
- break;
-
- FNAME(prefetch_gpte)(vcpu, sp, sptep, gpte, false);
- }
-
- if (!sp->unsync_children)
- break;
- }
- write_unlock(&vcpu->kvm->mmu_lock);
-}
-
/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gpa_t addr, u64 access,
@@ -936,114 +877,75 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
* can't change unless all sptes pointing to it are nuked first.
*
* Returns
- * < 0: the sp should be zapped
- * 0: the sp is synced and no tlb flushing is required
- * > 0: the sp is synced and tlb flushing is required
+ * < 0: failed to sync spte
+ * 0: the spte is synced and no tlb flushing is required
+ * > 0: the spte is synced and tlb flushing is required
*/
-static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i)
{
- union kvm_mmu_page_role root_role = vcpu->arch.mmu->root_role;
- int i;
bool host_writable;
gpa_t first_pte_gpa;
- bool flush = false;
+ u64 *sptep, spte;
+ struct kvm_memory_slot *slot;
+ unsigned pte_access;
+ pt_element_t gpte;
+ gpa_t pte_gpa;
+ gfn_t gfn;
- /*
- * Ignore various flags when verifying that it's safe to sync a shadow
- * page using the current MMU context.
- *
- * - level: not part of the overall MMU role and will never match as the MMU's
- * level tracks the root level
- * - access: updated based on the new guest PTE
- * - quadrant: not part of the overall MMU role (similar to level)
- */
- const union kvm_mmu_page_role sync_role_ign = {
- .level = 0xf,
- .access = 0x7,
- .quadrant = 0x3,
- .passthrough = 0x1,
- };
-
- /*
- * Direct pages can never be unsync, and KVM should never attempt to
- * sync a shadow page for a different MMU context, e.g. if the role
- * differs then the memslot lookup (SMM vs. non-SMM) will be bogus, the
- * reserved bits checks will be wrong, etc...
- */
- if (WARN_ON_ONCE(sp->role.direct ||
- (sp->role.word ^ root_role.word) & ~sync_role_ign.word))
- return -1;
+ if (WARN_ON_ONCE(!sp->spt[i]))
+ return 0;
first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
+ pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
- for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
- u64 *sptep, spte;
- struct kvm_memory_slot *slot;
- unsigned pte_access;
- pt_element_t gpte;
- gpa_t pte_gpa;
- gfn_t gfn;
+ if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
+ sizeof(pt_element_t)))
+ return -1;
- if (!sp->spt[i])
- continue;
+ if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte))
+ return 1;
- pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
+ gfn = gpte_to_gfn(gpte);
+ pte_access = sp->role.access;
+ pte_access &= FNAME(gpte_access)(gpte);
+ FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
- if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
- sizeof(pt_element_t)))
- return -1;
-
- if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
- flush = true;
- continue;
- }
-
- gfn = gpte_to_gfn(gpte);
- pte_access = sp->role.access;
- pte_access &= FNAME(gpte_access)(gpte);
- FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
-
- if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access))
- continue;
-
- /*
- * Drop the SPTE if the new protections would result in a RWX=0
- * SPTE or if the gfn is changing. The RWX=0 case only affects
- * EPT with execute-only support, i.e. EPT without an effective
- * "present" bit, as all other paging modes will create a
- * read-only SPTE if pte_access is zero.
- */
- if ((!pte_access && !shadow_present_mask) ||
- gfn != kvm_mmu_page_get_gfn(sp, i)) {
- drop_spte(vcpu->kvm, &sp->spt[i]);
- flush = true;
- continue;
- }
-
- /* Update the shadowed access bits in case they changed. */
- kvm_mmu_page_set_access(sp, i, pte_access);
-
- sptep = &sp->spt[i];
- spte = *sptep;
- host_writable = spte & shadow_host_writable_mask;
- slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- make_spte(vcpu, sp, slot, pte_access, gfn,
- spte_to_pfn(spte), spte, true, false,
- host_writable, &spte);
-
- flush |= mmu_spte_update(sptep, spte);
- }
+ if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access))
+ return 0;
/*
- * Note, any flush is purely for KVM's correctness, e.g. when dropping
- * an existing SPTE or clearing W/A/D bits to ensure an mmu_notifier
- * unmap or dirty logging event doesn't fail to flush. The guest is
- * responsible for flushing the TLB to ensure any changes in protection
- * bits are recognized, i.e. until the guest flushes or page faults on
- * a relevant address, KVM is architecturally allowed to let vCPUs use
- * cached translations with the old protection bits.
+ * Drop the SPTE if the new protections would result in a RWX=0
+ * SPTE or if the gfn is changing. The RWX=0 case only affects
+ * EPT with execute-only support, i.e. EPT without an effective
+ * "present" bit, as all other paging modes will create a
+ * read-only SPTE if pte_access is zero.
*/
- return flush;
+ if ((!pte_access && !shadow_present_mask) ||
+ gfn != kvm_mmu_page_get_gfn(sp, i)) {
+ drop_spte(vcpu->kvm, &sp->spt[i]);
+ return 1;
+ }
+ /*
+ * Do nothing if the permissions are unchanged. The existing SPTE is
+ * still, and prefetch_invalid_gpte() has verified that the A/D bits
+ * are set in the "new" gPTE, i.e. there is no danger of missing an A/D
+ * update due to A/D bits being set in the SPTE but not the gPTE.
+ */
+ if (kvm_mmu_page_get_access(sp, i) == pte_access)
+ return 0;
+
+ /* Update the shadowed access bits in case they changed. */
+ kvm_mmu_page_set_access(sp, i, pte_access);
+
+ sptep = &sp->spt[i];
+ spte = *sptep;
+ host_writable = spte & shadow_host_writable_mask;
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ make_spte(vcpu, sp, slot, pte_access, gfn,
+ spte_to_pfn(spte), spte, true, false,
+ host_writable, &spte);
+
+ return mmu_spte_update(sptep, spte);
}
#undef pt_element_t
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index c15bfca..cf2c642 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -164,7 +164,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
/*
* For simplicity, enforce the NX huge page mitigation even if not
* strictly necessary. KVM could ignore the mitigation if paging is
- * disabled in the guest, as the guest doesn't have an page tables to
+ * disabled in the guest, as the guest doesn't have any page tables to
* abuse. But to safely ignore the mitigation, KVM would have to
* ensure a new MMU is loaded (or all shadow pages zapped) when CR0.PG
* is toggled on, and that's a net negative for performance when TDP is
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index f0af385..fae5595 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -29,29 +29,49 @@ static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
WRITE_ONCE(*rcu_dereference(sptep), new_spte);
}
+/*
+ * SPTEs must be modified atomically if they are shadow-present, leaf
+ * SPTEs, and have volatile bits, i.e. has bits that can be set outside
+ * of mmu_lock. The Writable bit can be set by KVM's fast page fault
+ * handler, and Accessed and Dirty bits can be set by the CPU.
+ *
+ * Note, non-leaf SPTEs do have Accessed bits and those bits are
+ * technically volatile, but KVM doesn't consume the Accessed bit of
+ * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This
+ * logic needs to be reassessed if KVM were to use non-leaf Accessed
+ * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
+ */
+static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level)
+{
+ return is_shadow_present_pte(old_spte) &&
+ is_last_spte(old_spte, level) &&
+ spte_has_volatile_bits(old_spte);
+}
+
static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
u64 new_spte, int level)
{
- /*
- * Atomically write the SPTE if it is a shadow-present, leaf SPTE with
- * volatile bits, i.e. has bits that can be set outside of mmu_lock.
- * The Writable bit can be set by KVM's fast page fault handler, and
- * Accessed and Dirty bits can be set by the CPU.
- *
- * Note, non-leaf SPTEs do have Accessed bits and those bits are
- * technically volatile, but KVM doesn't consume the Accessed bit of
- * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This
- * logic needs to be reassessed if KVM were to use non-leaf Accessed
- * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
- */
- if (is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) &&
- spte_has_volatile_bits(old_spte))
+ if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level))
return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
__kvm_tdp_mmu_write_spte(sptep, new_spte);
return old_spte;
}
+static inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte,
+ u64 mask, int level)
+{
+ atomic64_t *sptep_atomic;
+
+ if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) {
+ sptep_atomic = (atomic64_t *)rcu_dereference(sptep);
+ return (u64)atomic64_fetch_and(~mask, sptep_atomic);
+ }
+
+ __kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask);
+ return old_spte;
+}
+
/*
* A TDP iterator performs a pre-order walk over a TDP paging structure.
*/
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 7c25dbf..b2fca11b 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -334,35 +334,6 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
u64 old_spte, u64 new_spte, int level,
bool shared);
-static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level)
-{
- if (!is_shadow_present_pte(old_spte) || !is_last_spte(old_spte, level))
- return;
-
- if (is_accessed_spte(old_spte) &&
- (!is_shadow_present_pte(new_spte) || !is_accessed_spte(new_spte) ||
- spte_to_pfn(old_spte) != spte_to_pfn(new_spte)))
- kvm_set_pfn_accessed(spte_to_pfn(old_spte));
-}
-
-static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
- u64 old_spte, u64 new_spte, int level)
-{
- bool pfn_changed;
- struct kvm_memory_slot *slot;
-
- if (level > PG_LEVEL_4K)
- return;
-
- pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
-
- if ((!is_writable_pte(old_spte) || pfn_changed) &&
- is_writable_pte(new_spte)) {
- slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn);
- mark_page_dirty_in_slot(kvm, slot, gfn);
- }
-}
-
static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
kvm_account_pgtable_pages((void *)sp->spt, +1);
@@ -505,7 +476,7 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
}
/**
- * __handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * handle_changed_spte - handle bookkeeping associated with an SPTE change
* @kvm: kvm instance
* @as_id: the address space of the paging structure the SPTE was a part of
* @gfn: the base GFN that was mapped by the SPTE
@@ -516,12 +487,13 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
* the MMU lock and the operation must synchronize with other
* threads that might be modifying SPTEs.
*
- * Handle bookkeeping that might result from the modification of a SPTE.
- * This function must be called for all TDP SPTE modifications.
+ * Handle bookkeeping that might result from the modification of a SPTE. Note,
+ * dirty logging updates are handled in common code, not here (see make_spte()
+ * and fast_pf_fix_direct_spte()).
*/
-static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
- u64 old_spte, u64 new_spte, int level,
- bool shared)
+static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ u64 old_spte, u64 new_spte, int level,
+ bool shared)
{
bool was_present = is_shadow_present_pte(old_spte);
bool is_present = is_shadow_present_pte(new_spte);
@@ -605,17 +577,10 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
if (was_present && !was_leaf &&
(is_leaf || !is_present || WARN_ON_ONCE(pfn_changed)))
handle_removed_pt(kvm, spte_to_child_pt(old_spte, level), shared);
-}
-static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
- u64 old_spte, u64 new_spte, int level,
- bool shared)
-{
- __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level,
- shared);
- handle_changed_spte_acc_track(old_spte, new_spte, level);
- handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte,
- new_spte, level);
+ if (was_leaf && is_accessed_spte(old_spte) &&
+ (!is_present || !is_accessed_spte(new_spte) || pfn_changed))
+ kvm_set_pfn_accessed(spte_to_pfn(old_spte));
}
/*
@@ -658,9 +623,8 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm,
if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
return -EBUSY;
- __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
- new_spte, iter->level, true);
- handle_changed_spte_acc_track(iter->old_spte, new_spte, iter->level);
+ handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
+ new_spte, iter->level, true);
return 0;
}
@@ -696,7 +660,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
/*
- * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
+ * tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
* @kvm: KVM instance
* @as_id: Address space ID, i.e. regular vs. SMM
* @sptep: Pointer to the SPTE
@@ -704,23 +668,12 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
* @new_spte: The new value that will be set for the SPTE
* @gfn: The base GFN that was (or will be) mapped by the SPTE
* @level: The level _containing_ the SPTE (its parent PT's level)
- * @record_acc_track: Notify the MM subsystem of changes to the accessed state
- * of the page. Should be set unless handling an MMU
- * notifier for access tracking. Leaving record_acc_track
- * unset in that case prevents page accesses from being
- * double counted.
- * @record_dirty_log: Record the page as dirty in the dirty bitmap if
- * appropriate for the change being made. Should be set
- * unless performing certain dirty logging operations.
- * Leaving record_dirty_log unset in that case prevents page
- * writes from being double counted.
*
* Returns the old SPTE value, which _may_ be different than @old_spte if the
* SPTE had voldatile bits.
*/
-static u64 __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
- u64 old_spte, u64 new_spte, gfn_t gfn, int level,
- bool record_acc_track, bool record_dirty_log)
+static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
+ u64 old_spte, u64 new_spte, gfn_t gfn, int level)
{
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -735,46 +688,17 @@ static u64 __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
- __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
-
- if (record_acc_track)
- handle_changed_spte_acc_track(old_spte, new_spte, level);
- if (record_dirty_log)
- handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte,
- new_spte, level);
+ handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
return old_spte;
}
-static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
- u64 new_spte, bool record_acc_track,
- bool record_dirty_log)
+static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
+ u64 new_spte)
{
WARN_ON_ONCE(iter->yielded);
-
- iter->old_spte = __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep,
- iter->old_spte, new_spte,
- iter->gfn, iter->level,
- record_acc_track, record_dirty_log);
-}
-
-static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
- u64 new_spte)
-{
- _tdp_mmu_set_spte(kvm, iter, new_spte, true, true);
-}
-
-static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm,
- struct tdp_iter *iter,
- u64 new_spte)
-{
- _tdp_mmu_set_spte(kvm, iter, new_spte, false, true);
-}
-
-static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
- struct tdp_iter *iter,
- u64 new_spte)
-{
- _tdp_mmu_set_spte(kvm, iter, new_spte, true, false);
+ iter->old_spte = tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep,
+ iter->old_spte, new_spte,
+ iter->gfn, iter->level);
}
#define tdp_root_for_each_pte(_iter, _root, _start, _end) \
@@ -866,7 +790,7 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
continue;
if (!shared)
- tdp_mmu_set_spte(kvm, &iter, 0);
+ tdp_mmu_iter_set_spte(kvm, &iter, 0);
else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0))
goto retry;
}
@@ -923,8 +847,8 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
return false;
- __tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
- sp->gfn, sp->role.level + 1, true, true);
+ tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
+ sp->gfn, sp->role.level + 1);
return true;
}
@@ -958,7 +882,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
!is_last_spte(iter.old_spte, iter.level))
continue;
- tdp_mmu_set_spte(kvm, &iter, 0);
+ tdp_mmu_iter_set_spte(kvm, &iter, 0);
flush = true;
}
@@ -1128,7 +1052,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
if (ret)
return ret;
} else {
- tdp_mmu_set_spte(kvm, iter, spte);
+ tdp_mmu_iter_set_spte(kvm, iter, spte);
}
tdp_account_mmu_page(kvm, sp);
@@ -1262,33 +1186,42 @@ static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm,
/*
* Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero
* if any of the GFNs in the range have been accessed.
+ *
+ * No need to mark the corresponding PFN as accessed as this call is coming
+ * from the clear_young() or clear_flush_young() notifier, which uses the
+ * return value to determine if the page has been accessed.
*/
static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter,
struct kvm_gfn_range *range)
{
- u64 new_spte = 0;
+ u64 new_spte;
/* If we have a non-accessed entry we don't need to change the pte. */
if (!is_accessed_spte(iter->old_spte))
return false;
- new_spte = iter->old_spte;
-
- if (spte_ad_enabled(new_spte)) {
- new_spte &= ~shadow_accessed_mask;
+ if (spte_ad_enabled(iter->old_spte)) {
+ iter->old_spte = tdp_mmu_clear_spte_bits(iter->sptep,
+ iter->old_spte,
+ shadow_accessed_mask,
+ iter->level);
+ new_spte = iter->old_spte & ~shadow_accessed_mask;
} else {
/*
* Capture the dirty status of the page, so that it doesn't get
* lost when the SPTE is marked for access tracking.
*/
- if (is_writable_pte(new_spte))
- kvm_set_pfn_dirty(spte_to_pfn(new_spte));
+ if (is_writable_pte(iter->old_spte))
+ kvm_set_pfn_dirty(spte_to_pfn(iter->old_spte));
- new_spte = mark_spte_for_access_track(new_spte);
+ new_spte = mark_spte_for_access_track(iter->old_spte);
+ iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep,
+ iter->old_spte, new_spte,
+ iter->level);
}
- tdp_mmu_set_spte_no_acc_track(kvm, iter, new_spte);
-
+ trace_kvm_tdp_mmu_spte_changed(iter->as_id, iter->gfn, iter->level,
+ iter->old_spte, new_spte);
return true;
}
@@ -1324,15 +1257,15 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
* Note, when changing a read-only SPTE, it's not strictly necessary to
* zero the SPTE before setting the new PFN, but doing so preserves the
* invariant that the PFN of a present * leaf SPTE can never change.
- * See __handle_changed_spte().
+ * See handle_changed_spte().
*/
- tdp_mmu_set_spte(kvm, iter, 0);
+ tdp_mmu_iter_set_spte(kvm, iter, 0);
if (!pte_write(range->pte)) {
new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
pte_pfn(range->pte));
- tdp_mmu_set_spte(kvm, iter, new_spte);
+ tdp_mmu_iter_set_spte(kvm, iter, new_spte);
}
return true;
@@ -1349,7 +1282,7 @@ bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
/*
* No need to handle the remote TLB flush under RCU protection, the
* target SPTE _must_ be a leaf SPTE, i.e. cannot result in freeing a
- * shadow page. See the WARN on pfn_changed in __handle_changed_spte().
+ * shadow page. See the WARN on pfn_changed in handle_changed_spte().
*/
return kvm_tdp_mmu_handle_gfn(kvm, range, set_spte_gfn);
}
@@ -1607,8 +1540,8 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end)
{
+ u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
struct tdp_iter iter;
- u64 new_spte;
bool spte_set = false;
rcu_read_lock();
@@ -1621,19 +1554,13 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
if (!is_shadow_present_pte(iter.old_spte))
continue;
- if (spte_ad_need_write_protect(iter.old_spte)) {
- if (is_writable_pte(iter.old_spte))
- new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
- else
- continue;
- } else {
- if (iter.old_spte & shadow_dirty_mask)
- new_spte = iter.old_spte & ~shadow_dirty_mask;
- else
- continue;
- }
+ MMU_WARN_ON(kvm_ad_enabled() &&
+ spte_ad_need_write_protect(iter.old_spte));
- if (tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
+ if (!(iter.old_spte & dbit))
+ continue;
+
+ if (tdp_mmu_set_spte_atomic(kvm, &iter, iter.old_spte & ~dbit))
goto retry;
spte_set = true;
@@ -1675,8 +1602,9 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t gfn, unsigned long mask, bool wrprot)
{
+ u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
+ shadow_dirty_mask;
struct tdp_iter iter;
- u64 new_spte;
rcu_read_lock();
@@ -1685,25 +1613,26 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
if (!mask)
break;
+ MMU_WARN_ON(kvm_ad_enabled() &&
+ spte_ad_need_write_protect(iter.old_spte));
+
if (iter.level > PG_LEVEL_4K ||
!(mask & (1UL << (iter.gfn - gfn))))
continue;
mask &= ~(1UL << (iter.gfn - gfn));
- if (wrprot || spte_ad_need_write_protect(iter.old_spte)) {
- if (is_writable_pte(iter.old_spte))
- new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
- else
- continue;
- } else {
- if (iter.old_spte & shadow_dirty_mask)
- new_spte = iter.old_spte & ~shadow_dirty_mask;
- else
- continue;
- }
+ if (!(iter.old_spte & dbit))
+ continue;
- tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+ iter.old_spte = tdp_mmu_clear_spte_bits(iter.sptep,
+ iter.old_spte, dbit,
+ iter.level);
+
+ trace_kvm_tdp_mmu_spte_changed(iter.as_id, iter.gfn, iter.level,
+ iter.old_spte,
+ iter.old_spte & ~dbit);
+ kvm_set_pfn_dirty(spte_to_pfn(iter.old_spte));
}
rcu_read_unlock();
@@ -1821,7 +1750,7 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
if (new_spte == iter.old_spte)
break;
- tdp_mmu_set_spte(kvm, &iter, new_spte);
+ tdp_mmu_iter_set_spte(kvm, &iter, new_spte);
spte_set = true;
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 612e6c7..1690d41 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -93,7 +93,7 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
-static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
+static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc)
{
return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
}
@@ -400,6 +400,12 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
return is_fixed_event_allowed(filter, pmc->idx);
}
+static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
+{
+ return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
+ check_pmu_event_filter(pmc);
+}
+
static void reprogram_counter(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -409,10 +415,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
pmc_pause_counter(pmc);
- if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
- goto reprogram_complete;
-
- if (!check_pmu_event_filter(pmc))
+ if (!pmc_event_is_allowed(pmc))
goto reprogram_complete;
if (pmc->counter < pmc->prev_counter)
@@ -540,9 +543,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (!pmc)
return 1;
- if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
+ if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_PCE) &&
(static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
- (kvm_read_cr0(vcpu) & X86_CR0_PE))
+ kvm_is_cr0_bit_set(vcpu, X86_CR0_PE))
return 1;
*data = pmc_read_counter(pmc) & mask;
@@ -589,6 +592,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
{
+ if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
+ return;
+
+ bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
static_call(kvm_x86_pmu_refresh)(vcpu);
}
@@ -646,7 +653,7 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
{
pmc->prev_counter = pmc->counter;
pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
- kvm_pmu_request_counter_reprogam(pmc);
+ kvm_pmu_request_counter_reprogram(pmc);
}
static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
@@ -684,7 +691,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
- if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
+ if (!pmc || !pmc_event_is_allowed(pmc))
continue;
/* Ignore checks for edge detect, pin control, invert and CMASK bits */
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index be62c16..5c7bbf0 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -195,7 +195,7 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
KVM_PMC_MAX_FIXED);
}
-static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc)
+static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
{
set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 05d3894..96936dd 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -139,13 +139,18 @@ void recalc_intercepts(struct vcpu_svm *svm)
if (g->int_ctl & V_INTR_MASKING_MASK) {
/*
- * Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8
- * does not affect any interrupt we may want to inject;
- * therefore, writes to CR8 are irrelevant to L0, as are
- * interrupt window vmexits.
+ * If L2 is active and V_INTR_MASKING is enabled in vmcb12,
+ * disable intercept of CR8 writes as L2's CR8 does not affect
+ * any interrupt KVM may want to inject.
+ *
+ * Similarly, disable intercept of virtual interrupts (used to
+ * detect interrupt windows) if the saved RFLAGS.IF is '0', as
+ * the effective RFLAGS.IF for L1 interrupts will never be set
+ * while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs).
*/
vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
- vmcb_clr_intercept(c, INTERCEPT_VINTR);
+ if (!(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF))
+ vmcb_clr_intercept(c, INTERCEPT_VINTR);
}
/*
@@ -276,6 +281,11 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
return false;
+ if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
+ !vmcb12_is_intercept(control, INTERCEPT_NMI))) {
+ return false;
+ }
+
return true;
}
@@ -416,22 +426,24 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
/* Only a few fields of int_ctl are written by the processor. */
mask = V_IRQ_MASK | V_TPR_MASK;
- if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
- svm_is_intercept(svm, INTERCEPT_VINTR)) {
- /*
- * In order to request an interrupt window, L0 is usurping
- * svm->vmcb->control.int_ctl and possibly setting V_IRQ
- * even if it was clear in L1's VMCB. Restoring it would be
- * wrong. However, in this case V_IRQ will remain true until
- * interrupt_window_interception calls svm_clear_vintr and
- * restores int_ctl. We can just leave it aside.
- */
+ /*
+ * Don't sync vmcb02 V_IRQ back to vmcb12 if KVM (L0) is intercepting
+ * virtual interrupts in order to request an interrupt window, as KVM
+ * has usurped vmcb02's int_ctl. If an interrupt window opens before
+ * the next VM-Exit, svm_clear_vintr() will restore vmcb12's int_ctl.
+ * If no window opens, V_IRQ will be correctly preserved in vmcb12's
+ * int_ctl (because it was never recognized while L2 was running).
+ */
+ if (svm_is_intercept(svm, INTERCEPT_VINTR) &&
+ !test_bit(INTERCEPT_VINTR, (unsigned long *)svm->nested.ctl.intercepts))
mask &= ~V_IRQ_MASK;
- }
if (nested_vgif_enabled(svm))
mask |= V_GIF_MASK;
+ if (nested_vnmi_enabled(svm))
+ mask |= V_NMI_BLOCKING_MASK | V_NMI_PENDING_MASK;
+
svm->nested.ctl.int_ctl &= ~mask;
svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask;
}
@@ -651,6 +663,17 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
else
int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
+ if (vnmi) {
+ if (vmcb01->control.int_ctl & V_NMI_PENDING_MASK) {
+ svm->vcpu.arch.nmi_pending++;
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ }
+ if (nested_vnmi_enabled(svm))
+ int_ctl_vmcb12_bits |= (V_NMI_PENDING_MASK |
+ V_NMI_ENABLE_MASK |
+ V_NMI_BLOCKING_MASK);
+ }
+
/* Copied from vmcb01. msrpm_base can be overwritten later. */
vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
@@ -1021,6 +1044,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm_switch_vmcb(svm, &svm->vmcb01);
+ /*
+ * Rules for synchronizing int_ctl bits from vmcb02 to vmcb01:
+ *
+ * V_IRQ, V_IRQ_VECTOR, V_INTR_PRIO_MASK, V_IGN_TPR: If L1 doesn't
+ * intercept interrupts, then KVM will use vmcb02's V_IRQ (and related
+ * flags) to detect interrupt windows for L1 IRQs (even if L1 uses
+ * virtual interrupt masking). Raise KVM_REQ_EVENT to ensure that
+ * KVM re-requests an interrupt window if necessary, which implicitly
+ * copies this bits from vmcb02 to vmcb01.
+ *
+ * V_TPR: If L1 doesn't use virtual interrupt masking, then L1's vTPR
+ * is stored in vmcb02, but its value doesn't need to be copied from/to
+ * vmcb01 because it is copied from/to the virtual APIC's TPR register
+ * on each VM entry/exit.
+ *
+ * V_GIF: If nested vGIF is not used, KVM uses vmcb02's V_GIF for L1's
+ * V_GIF. However, GIF is architecturally clear on each VM exit, thus
+ * there is no need to copy V_GIF from vmcb02 to vmcb01.
+ */
+ if (!nested_exit_on_intr(svm))
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+
if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
svm_copy_lbrs(vmcb12, vmcb02);
svm_update_lbrv(vcpu);
@@ -1029,6 +1074,20 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm_update_lbrv(vcpu);
}
+ if (vnmi) {
+ if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)
+ vmcb01->control.int_ctl |= V_NMI_BLOCKING_MASK;
+ else
+ vmcb01->control.int_ctl &= ~V_NMI_BLOCKING_MASK;
+
+ if (vcpu->arch.nmi_pending) {
+ vcpu->arch.nmi_pending--;
+ vmcb01->control.int_ctl |= V_NMI_PENDING_MASK;
+ } else {
+ vmcb01->control.int_ctl &= ~V_NMI_PENDING_MASK;
+ }
+ }
+
/*
* On vmexit the GIF is set to false and
* no event can be injected in L1.
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index cc77a06..5fa939e 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -161,7 +161,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
data &= ~pmu->reserved_bits;
if (data != pmc->eventsel) {
pmc->eventsel = data;
- kvm_pmu_request_counter_reprogam(pmc);
+ kvm_pmu_request_counter_reprogram(pmc);
}
return 0;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 70183d2..eb308c9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -95,6 +95,7 @@ static const struct svm_direct_access_msrs {
#endif
{ .index = MSR_IA32_SPEC_CTRL, .always = false },
{ .index = MSR_IA32_PRED_CMD, .always = false },
+ { .index = MSR_IA32_FLUSH_CMD, .always = false },
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
@@ -230,6 +231,8 @@ module_param(dump_invalid_vmcb, bool, 0644);
bool intercept_smi = true;
module_param(intercept_smi, bool, 0444);
+bool vnmi = true;
+module_param(vnmi, bool, 0444);
static bool svm_gp_erratum_intercept = true;
@@ -1311,6 +1314,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
if (kvm_vcpu_apicv_active(vcpu))
avic_init_vmcb(svm, vmcb);
+ if (vnmi)
+ svm->vmcb->control.int_ctl |= V_NMI_ENABLE_MASK;
+
if (vgif) {
svm_clr_intercept(svm, INTERCEPT_STGI);
svm_clr_intercept(svm, INTERCEPT_CLGI);
@@ -1584,6 +1590,16 @@ static void svm_set_vintr(struct vcpu_svm *svm)
svm_set_intercept(svm, INTERCEPT_VINTR);
/*
+ * Recalculating intercepts may have cleared the VINTR intercept. If
+ * V_INTR_MASKING is enabled in vmcb12, then the effective RFLAGS.IF
+ * for L1 physical interrupts is L1's RFLAGS.IF at the time of VMRUN.
+ * Requesting an interrupt window if save.RFLAGS.IF=0 is pointless as
+ * interrupts will never be unblocked while L2 is running.
+ */
+ if (!svm_is_intercept(svm, INTERCEPT_VINTR))
+ return;
+
+ /*
* This is just a dummy VINTR to actually cause a vmexit to happen.
* Actual injection of virtual interrupts happens through EVENTINJ.
*/
@@ -2480,16 +2496,29 @@ static int task_switch_interception(struct kvm_vcpu *vcpu)
has_error_code, error_code);
}
+static void svm_clr_iret_intercept(struct vcpu_svm *svm)
+{
+ if (!sev_es_guest(svm->vcpu.kvm))
+ svm_clr_intercept(svm, INTERCEPT_IRET);
+}
+
+static void svm_set_iret_intercept(struct vcpu_svm *svm)
+{
+ if (!sev_es_guest(svm->vcpu.kvm))
+ svm_set_intercept(svm, INTERCEPT_IRET);
+}
+
static int iret_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
++vcpu->stat.nmi_window_exits;
svm->awaiting_iret_completion = true;
- if (!sev_es_guest(vcpu->kvm)) {
- svm_clr_intercept(svm, INTERCEPT_IRET);
+
+ svm_clr_iret_intercept(svm);
+ if (!sev_es_guest(vcpu->kvm))
svm->nmi_iret_rip = kvm_rip_read(vcpu);
- }
+
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
}
@@ -2869,32 +2898,10 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
-static int svm_set_msr_ia32_cmd(struct kvm_vcpu *vcpu, struct msr_data *msr,
- bool guest_has_feat, u64 cmd,
- int x86_feature_bit)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- if (!msr->host_initiated && !guest_has_feat)
- return 1;
-
- if (!(msr->data & ~cmd))
- return 1;
- if (!boot_cpu_has(x86_feature_bit))
- return 1;
- if (!msr->data)
- return 0;
-
- wrmsrl(msr->index, cmd);
- set_msr_interception(vcpu, svm->msrpm, msr->index, 0, 1);
-
- return 0;
-}
-
static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct vcpu_svm *svm = to_svm(vcpu);
- int r;
+ int ret = 0;
u32 ecx = msr->index;
u64 data = msr->data;
@@ -2964,16 +2971,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
*/
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
break;
- case MSR_IA32_PRED_CMD:
- r = svm_set_msr_ia32_cmd(vcpu, msr,
- guest_has_pred_cmd_msr(vcpu),
- PRED_CMD_IBPB, X86_FEATURE_IBPB);
- break;
- case MSR_IA32_FLUSH_CMD:
- r = svm_set_msr_ia32_cmd(vcpu, msr,
- guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D),
- L1D_FLUSH, X86_FEATURE_FLUSH_L1D);
- break;
case MSR_AMD64_VIRT_SPEC_CTRL:
if (!msr->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
@@ -3026,10 +3023,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
* guest via direct_access_msrs, and switch it via user return.
*/
preempt_disable();
- r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
+ ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
preempt_enable();
- if (r)
- return 1;
+ if (ret)
+ break;
svm->tsc_aux = data;
break;
@@ -3087,7 +3084,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
default:
return kvm_set_msr_common(vcpu, msr);
}
- return 0;
+ return ret;
}
static int msr_interception(struct kvm_vcpu *vcpu)
@@ -3498,11 +3495,43 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
return;
svm->nmi_masked = true;
- if (!sev_es_guest(vcpu->kvm))
- svm_set_intercept(svm, INTERCEPT_IRET);
+ svm_set_iret_intercept(svm);
++vcpu->stat.nmi_injections;
}
+static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!is_vnmi_enabled(svm))
+ return false;
+
+ return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK);
+}
+
+static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!is_vnmi_enabled(svm))
+ return false;
+
+ if (svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK)
+ return false;
+
+ svm->vmcb->control.int_ctl |= V_NMI_PENDING_MASK;
+ vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
+
+ /*
+ * Because the pending NMI is serviced by hardware, KVM can't know when
+ * the NMI is "injected", but for all intents and purposes, passing the
+ * NMI off to hardware counts as injection.
+ */
+ ++vcpu->stat.nmi_injections;
+
+ return true;
+}
+
static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3598,6 +3627,35 @@ static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
}
+static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (is_vnmi_enabled(svm))
+ return svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK;
+ else
+ return svm->nmi_masked;
+}
+
+static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (is_vnmi_enabled(svm)) {
+ if (masked)
+ svm->vmcb->control.int_ctl |= V_NMI_BLOCKING_MASK;
+ else
+ svm->vmcb->control.int_ctl &= ~V_NMI_BLOCKING_MASK;
+
+ } else {
+ svm->nmi_masked = masked;
+ if (masked)
+ svm_set_iret_intercept(svm);
+ else
+ svm_clr_iret_intercept(svm);
+ }
+}
+
bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3609,8 +3667,10 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
return false;
- return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
- svm->nmi_masked;
+ if (svm_get_nmi_mask(vcpu))
+ return true;
+
+ return vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK;
}
static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
@@ -3628,26 +3688,6 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return 1;
}
-static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
-{
- return to_svm(vcpu)->nmi_masked;
-}
-
-static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- if (masked) {
- svm->nmi_masked = true;
- if (!sev_es_guest(vcpu->kvm))
- svm_set_intercept(svm, INTERCEPT_IRET);
- } else {
- svm->nmi_masked = false;
- if (!sev_es_guest(vcpu->kvm))
- svm_clr_intercept(svm, INTERCEPT_IRET);
- }
-}
-
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3728,7 +3768,16 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (svm->nmi_masked && !svm->awaiting_iret_completion)
+ /*
+ * KVM should never request an NMI window when vNMI is enabled, as KVM
+ * allows at most one to-be-injected NMI and one pending NMI, i.e. if
+ * two NMIs arrive simultaneously, KVM will inject one and set
+ * V_NMI_PENDING for the other. WARN, but continue with the standard
+ * single-step approach to try and salvage the pending NMI.
+ */
+ WARN_ON_ONCE(is_vnmi_enabled(svm));
+
+ if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
return; /* IRET will cause a vm exit */
if (!gif_set(svm)) {
@@ -4124,7 +4173,7 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
{
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
return false;
case MSR_IA32_SMBASE:
if (!IS_ENABLED(CONFIG_KVM_SMM))
@@ -4166,8 +4215,18 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
+ svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI);
+
svm_recalc_instruction_intercepts(vcpu, svm);
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0,
+ !!guest_has_pred_cmd_msr(vcpu));
+
+ if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0,
+ !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
+
/* For sev guests, the memory encryption bit is not reserved in CR3. */
if (sev_guest(vcpu->kvm)) {
best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
@@ -4545,7 +4604,6 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len)
{
bool smep, smap, is_user;
- unsigned long cr4;
u64 error_code;
/* Emulation is always possible when KVM has access to all guest state. */
@@ -4637,9 +4695,8 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
goto resume_guest;
- cr4 = kvm_read_cr4(vcpu);
- smep = cr4 & X86_CR4_SMEP;
- smap = cr4 & X86_CR4_SMAP;
+ smep = kvm_is_cr4_bit_set(vcpu, X86_CR4_SMEP);
+ smap = kvm_is_cr4_bit_set(vcpu, X86_CR4_SMAP);
is_user = svm_get_cpl(vcpu) == 3;
if (smap && (!smep || is_user)) {
pr_err_ratelimited("SEV Guest triggered AMD Erratum 1096\n");
@@ -4777,6 +4834,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.patch_hypercall = svm_patch_hypercall,
.inject_irq = svm_inject_irq,
.inject_nmi = svm_inject_nmi,
+ .is_vnmi_pending = svm_is_vnmi_pending,
+ .set_vnmi_pending = svm_set_vnmi_pending,
.inject_exception = svm_inject_exception,
.cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
@@ -4919,6 +4978,9 @@ static __init void svm_set_cpu_caps(void)
if (vgif)
kvm_cpu_cap_set(X86_FEATURE_VGIF);
+ if (vnmi)
+ kvm_cpu_cap_set(X86_FEATURE_VNMI);
+
/* Nested VM can receive #VMEXIT instead of triggering #GP */
kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
}
@@ -5070,6 +5132,16 @@ static __init int svm_hardware_setup(void)
pr_info("Virtual GIF supported\n");
}
+ vnmi = vgif && vnmi && boot_cpu_has(X86_FEATURE_VNMI);
+ if (vnmi)
+ pr_info("Virtual NMI enabled\n");
+
+ if (!vnmi) {
+ svm_x86_ops.is_vnmi_pending = NULL;
+ svm_x86_ops.set_vnmi_pending = NULL;
+ }
+
+
if (lbrv) {
if (!boot_cpu_has(X86_FEATURE_LBRV))
lbrv = false;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 8398099..f44751d 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -36,6 +36,7 @@ extern bool npt_enabled;
extern int vgif;
extern bool intercept_smi;
extern bool x2avic_enabled;
+extern bool vnmi;
/*
* Clean bits in VMCB.
@@ -265,6 +266,7 @@ struct vcpu_svm {
bool pause_filter_enabled : 1;
bool pause_threshold_enabled : 1;
bool vgif_enabled : 1;
+ bool vnmi_enabled : 1;
u32 ldr_reg;
u32 dfr_reg;
@@ -539,6 +541,12 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
}
+static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
+{
+ return svm->vnmi_enabled &&
+ (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
+}
+
static inline bool is_x2apic_msrpm_offset(u32 offset)
{
/* 4 msrs per u8, and 4 u8 in u32 */
@@ -548,6 +556,27 @@ static inline bool is_x2apic_msrpm_offset(u32 offset)
(msr < (APIC_BASE_MSR + 0x100));
}
+static inline struct vmcb *get_vnmi_vmcb_l1(struct vcpu_svm *svm)
+{
+ if (!vnmi)
+ return NULL;
+
+ if (is_guest_mode(&svm->vcpu))
+ return NULL;
+ else
+ return svm->vmcb01.ptr;
+}
+
+static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
+{
+ struct vmcb *vmcb = get_vnmi_vmcb_l1(svm);
+
+ if (vmcb)
+ return !!(vmcb->control.int_ctl & V_NMI_ENABLE_MASK);
+ else
+ return false;
+}
+
/* svm.c */
#define MSR_INVALID 0xffffffffU
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index cff838f..8230010 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -35,9 +35,8 @@ static inline __init void svm_hv_hardware_setup(void)
if (npt_enabled &&
ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) {
pr_info(KBUILD_MODNAME ": Hyper-V enlightened NPT TLB flush enabled\n");
- svm_x86_ops.tlb_remote_flush = hv_remote_flush_tlb;
- svm_x86_ops.tlb_remote_flush_with_range =
- hv_remote_flush_tlb_with_range;
+ svm_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs;
+ svm_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range;
}
if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) {
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index dab529c..96ede74 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -358,6 +358,7 @@ static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
gpa_t addr)
{
+ unsigned long roots = 0;
uint i;
struct kvm_mmu_root_info *cached_root;
@@ -368,8 +369,10 @@ static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
eptp))
- vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
+ roots |= KVM_MMU_ROOT_PREVIOUS(i);
}
+ if (roots)
+ kvm_mmu_invalidate_addr(vcpu, vcpu->arch.mmu, addr, roots);
}
static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
@@ -4481,7 +4484,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* CR0_GUEST_HOST_MASK is already set in the original vmcs01
* (KVM doesn't change it);
*/
- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmx_set_cr0(vcpu, vmcs12->host_cr0);
/* Same as above - no reason to call set_cr4_guest_host_mask(). */
@@ -4632,7 +4635,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
*/
vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
@@ -5154,7 +5157,7 @@ static int handle_vmxon(struct kvm_vcpu *vcpu)
* does force CR0.PE=1, but only to also force VM86 in order to emulate
* Real Mode, and so there's no need to check CR0.PE manually.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
+ if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_VMXE)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index e8a3be0..741efe2 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -57,7 +57,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
- kvm_pmu_request_counter_reprogam(pmc);
+ kvm_pmu_request_counter_reprogram(pmc);
}
}
@@ -76,13 +76,13 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
{
int bit;
- struct kvm_pmc *pmc;
- for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
- pmc = intel_pmc_idx_to_pmc(pmu, bit);
- if (pmc)
- kvm_pmu_request_counter_reprogam(pmc);
- }
+ if (!diff)
+ return;
+
+ for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
+ set_bit(bit, pmu->reprogram_pmi);
+ kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu));
}
static bool intel_hw_event_available(struct kvm_pmc *pmc)
@@ -351,45 +351,47 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
msr_info->data = pmu->fixed_ctr_ctrl;
- return 0;
+ break;
case MSR_CORE_PERF_GLOBAL_STATUS:
msr_info->data = pmu->global_status;
- return 0;
+ break;
case MSR_CORE_PERF_GLOBAL_CTRL:
msr_info->data = pmu->global_ctrl;
- return 0;
+ break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
msr_info->data = 0;
- return 0;
+ break;
case MSR_IA32_PEBS_ENABLE:
msr_info->data = pmu->pebs_enable;
- return 0;
+ break;
case MSR_IA32_DS_AREA:
msr_info->data = pmu->ds_area;
- return 0;
+ break;
case MSR_PEBS_DATA_CFG:
msr_info->data = pmu->pebs_data_cfg;
- return 0;
+ break;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
u64 val = pmc_read_counter(pmc);
msr_info->data =
val & pmu->counter_bitmask[KVM_PMC_GP];
- return 0;
+ break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
u64 val = pmc_read_counter(pmc);
msr_info->data =
val & pmu->counter_bitmask[KVM_PMC_FIXED];
- return 0;
+ break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
msr_info->data = pmc->eventsel;
- return 0;
- } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true))
- return 0;
+ break;
+ } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) {
+ break;
+ }
+ return 1;
}
- return 1;
+ return 0;
}
static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -402,44 +404,43 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
- if (pmu->fixed_ctr_ctrl == data)
- return 0;
- if (!(data & pmu->fixed_ctr_ctrl_mask)) {
+ if (data & pmu->fixed_ctr_ctrl_mask)
+ return 1;
+
+ if (pmu->fixed_ctr_ctrl != data)
reprogram_fixed_counters(pmu, data);
- return 0;
- }
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
- if (msr_info->host_initiated) {
- pmu->global_status = data;
- return 0;
- }
- break; /* RO MSR */
+ if (!msr_info->host_initiated)
+ return 1; /* RO MSR */
+
+ pmu->global_status = data;
+ break;
case MSR_CORE_PERF_GLOBAL_CTRL:
- if (pmu->global_ctrl == data)
- return 0;
- if (kvm_valid_perf_global_ctrl(pmu, data)) {
+ if (!kvm_valid_perf_global_ctrl(pmu, data))
+ return 1;
+
+ if (pmu->global_ctrl != data) {
diff = pmu->global_ctrl ^ data;
pmu->global_ctrl = data;
reprogram_counters(pmu, diff);
- return 0;
}
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- if (!(data & pmu->global_ovf_ctrl_mask)) {
- if (!msr_info->host_initiated)
- pmu->global_status &= ~data;
- return 0;
- }
+ if (data & pmu->global_ovf_ctrl_mask)
+ return 1;
+
+ if (!msr_info->host_initiated)
+ pmu->global_status &= ~data;
break;
case MSR_IA32_PEBS_ENABLE:
- if (pmu->pebs_enable == data)
- return 0;
- if (!(data & pmu->pebs_enable_mask)) {
+ if (data & pmu->pebs_enable_mask)
+ return 1;
+
+ if (pmu->pebs_enable != data) {
diff = pmu->pebs_enable ^ data;
pmu->pebs_enable = data;
reprogram_counters(pmu, diff);
- return 0;
}
break;
case MSR_IA32_DS_AREA:
@@ -447,15 +448,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
if (is_noncanonical_address(data, vcpu))
return 1;
+
pmu->ds_area = data;
- return 0;
+ break;
case MSR_PEBS_DATA_CFG:
- if (pmu->pebs_data_cfg == data)
- return 0;
- if (!(data & pmu->pebs_data_cfg_mask)) {
- pmu->pebs_data_cfg = data;
- return 0;
- }
+ if (data & pmu->pebs_data_cfg_mask)
+ return 1;
+
+ pmu->pebs_data_cfg = data;
break;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
@@ -463,33 +463,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((msr & MSR_PMC_FULL_WIDTH_BIT) &&
(data & ~pmu->counter_bitmask[KVM_PMC_GP]))
return 1;
+
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
pmc_update_sample_period(pmc);
- return 0;
+ break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc->counter += data - pmc_read_counter(pmc);
pmc_update_sample_period(pmc);
- return 0;
+ break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
- if (data == pmc->eventsel)
- return 0;
reserved_bits = pmu->reserved_bits;
if ((pmc->idx == 2) &&
(pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED))
reserved_bits ^= HSW_IN_TX_CHECKPOINTED;
- if (!(data & reserved_bits)) {
+ if (data & reserved_bits)
+ return 1;
+
+ if (data != pmc->eventsel) {
pmc->eventsel = data;
- kvm_pmu_request_counter_reprogam(pmc);
- return 0;
+ kvm_pmu_request_counter_reprogram(pmc);
}
- } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
- return 0;
+ break;
+ } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) {
+ break;
+ }
+ /* Not a known PMU MSR. */
+ return 1;
}
- return 1;
+ return 0;
}
static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
@@ -531,6 +536,16 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
+ memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));
+
+ /*
+ * Setting passthrough of LBR MSRs is done only in the VM-Entry loop,
+ * and PMU refresh is disallowed after the vCPU has run, i.e. this code
+ * should never be reached while KVM is passing through MSRs.
+ */
+ if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm))
+ return;
+
entry = kvm_find_cpuid_entry(vcpu, 0xa);
if (!entry || !vcpu->kvm->arch.enable_pmu)
return;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e06fcd6..44fb619 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -164,6 +164,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_IA32_SPEC_CTRL,
MSR_IA32_PRED_CMD,
+ MSR_IA32_FLUSH_CMD,
MSR_IA32_TSC,
#ifdef CONFIG_X86_64
MSR_FS_BASE,
@@ -1945,7 +1946,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
{
switch (msr->index) {
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested)
return 1;
return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
@@ -2030,7 +2031,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
break;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested_vmx_allowed(vcpu))
return 1;
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
@@ -2133,39 +2134,6 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
return debugctl;
}
-static int vmx_set_msr_ia32_cmd(struct kvm_vcpu *vcpu,
- struct msr_data *msr_info,
- bool guest_has_feat, u64 cmd,
- int x86_feature_bit)
-{
- if (!msr_info->host_initiated && !guest_has_feat)
- return 1;
-
- if (!(msr_info->data & ~cmd))
- return 1;
- if (!boot_cpu_has(x86_feature_bit))
- return 1;
- if (!msr_info->data)
- return 0;
-
- wrmsrl(msr_info->index, cmd);
-
- /*
- * For non-nested:
- * When it's written (to non-zero) for the first time, pass
- * it through.
- *
- * For nested:
- * The handling of the MSR bitmap for L2 guests is done in
- * nested_vmx_prepare_msr_bitmap. We should not touch the
- * vmcs02.msr_bitmap here since it gets completely overwritten
- * in the merging.
- */
- vmx_disable_intercept_for_msr(vcpu, msr_info->index, MSR_TYPE_W);
-
- return 0;
-}
-
/*
* Writes msr value into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
@@ -2318,18 +2286,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
return 1;
goto find_uret_msr;
- case MSR_IA32_PRED_CMD:
- ret = vmx_set_msr_ia32_cmd(vcpu, msr_info,
- guest_has_pred_cmd_msr(vcpu),
- PRED_CMD_IBPB,
- X86_FEATURE_IBPB);
- break;
- case MSR_IA32_FLUSH_CMD:
- ret = vmx_set_msr_ia32_cmd(vcpu, msr_info,
- guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D),
- L1D_FLUSH,
- X86_FEATURE_FLUSH_L1D);
- break;
case MSR_IA32_CR_PAT:
if (!kvm_pat_valid(data))
return 1;
@@ -2384,7 +2340,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->msr_ia32_sgxlepubkeyhash
[msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data;
break;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!msr_info->host_initiated)
return 1; /* they are read-only */
if (!nested_vmx_allowed(vcpu))
@@ -4790,7 +4746,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
/* 22.2.1, 20.8.1 */
vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
- vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
set_cr4_guest_host_mask(vmx);
@@ -5180,7 +5136,7 @@ bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
return true;
- return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
+ return vmx_get_cpl(vcpu) == 3 && kvm_is_cr0_bit_set(vcpu, X86_CR0_AM) &&
(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
}
@@ -5517,7 +5473,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
break;
case 3: /* lmsw */
val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
- trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
+ trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val));
kvm_lmsw(vcpu, val);
return kvm_skip_emulated_instruction(vcpu);
@@ -6974,7 +6930,7 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
* real mode.
*/
return enable_unrestricted_guest || emulate_invalid_guest_state;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
return nested;
case MSR_AMD64_VIRT_SPEC_CTRL:
case MSR_AMD64_TSC_RATIO:
@@ -7575,7 +7531,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
- if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
+ if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
cache = MTRR_TYPE_WRBACK;
else
@@ -7761,6 +7717,13 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
!guest_cpuid_has(vcpu, X86_FEATURE_XFD));
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
+ !guest_has_pred_cmd_msr(vcpu));
+
+ if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
+ !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
set_cr4_guest_host_mask(vmx);
@@ -7793,9 +7756,11 @@ static u64 vmx_get_perf_capabilities(void)
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
- x86_perf_get_lbr(&lbr);
- if (lbr.nr)
- perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
+ x86_perf_get_lbr(&lbr);
+ if (lbr.nr)
+ perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ }
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
@@ -8447,9 +8412,8 @@ static __init int hardware_setup(void)
#if IS_ENABLED(CONFIG_HYPERV)
if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
&& enable_ept) {
- vmx_x86_ops.tlb_remote_flush = hv_remote_flush_tlb;
- vmx_x86_ops.tlb_remote_flush_with_range =
- hv_remote_flush_tlb_with_range;
+ vmx_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs;
+ vmx_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range;
}
#endif
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 2acdc54..9e66531 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -369,7 +369,7 @@ struct vcpu_vmx {
struct lbr_desc lbr_desc;
/* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS 15
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS 16
struct {
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
@@ -640,6 +640,24 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
(1 << VCPU_EXREG_EXIT_INFO_1) | \
(1 << VCPU_EXREG_EXIT_INFO_2))
+static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
+{
+ unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+
+ /*
+ * CR0.WP needs to be intercepted when KVM is shadowing legacy paging
+ * in order to construct shadow PTEs with the correct protections.
+ * Note! CR0.WP technically can be passed through to the guest if
+ * paging is disabled, but checking CR0.PG would generate a cyclical
+ * dependency of sorts due to forcing the caller to ensure CR0 holds
+ * the correct value prior to determining which CR0 bits can be owned
+ * by L1. Keep it simple and limit the optimization to EPT.
+ */
+ if (!enable_ept)
+ bits &= ~X86_CR0_WP;
+ return bits;
+}
+
static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{
return container_of(kvm, struct kvm_vmx, kvm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 237c483..523c39a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -194,7 +194,7 @@ bool __read_mostly eager_page_split = true;
module_param(eager_page_split, bool, 0644);
/* Enable/disable SMT_RSB bug mitigation */
-bool __read_mostly mitigate_smt_rsb;
+static bool __read_mostly mitigate_smt_rsb;
module_param(mitigate_smt_rsb, bool, 0444);
/*
@@ -802,8 +802,8 @@ void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
*/
if ((fault->error_code & PFERR_PRESENT_MASK) &&
!(fault->error_code & PFERR_RSVD_MASK))
- kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
- fault_mmu->root.hpa);
+ kvm_mmu_invalidate_addr(vcpu, fault_mmu, fault->address,
+ KVM_MMU_ROOT_CURRENT);
fault_mmu->inject_page_fault(vcpu, fault);
}
@@ -841,7 +841,7 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
{
- if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+ if ((dr != 4 && dr != 5) || !kvm_is_cr4_bit_set(vcpu, X86_CR4_DE))
return true;
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -906,6 +906,24 @@ EXPORT_SYMBOL_GPL(load_pdptrs);
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
{
+ /*
+ * CR0.WP is incorporated into the MMU role, but only for non-nested,
+ * indirect shadow MMUs. If paging is disabled, no updates are needed
+ * as there are no permission bits to emulate. If TDP is enabled, the
+ * MMU's metadata needs to be updated, e.g. so that emulating guest
+ * translations does the right thing, but there's no need to unload the
+ * root as CR0.WP doesn't affect SPTEs.
+ */
+ if ((cr0 ^ old_cr0) == X86_CR0_WP) {
+ if (!(cr0 & X86_CR0_PG))
+ return;
+
+ if (tdp_enabled) {
+ kvm_init_mmu(vcpu);
+ return;
+ }
+ }
+
if ((cr0 ^ old_cr0) & X86_CR0_PG) {
kvm_clear_async_pf_completion_queue(vcpu);
kvm_async_pf_hash_reset(vcpu);
@@ -965,7 +983,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
return 1;
if (!(cr0 & X86_CR0_PG) &&
- (is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
+ (is_64_bit_mode(vcpu) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)))
return 1;
static_call(kvm_x86_set_cr0)(vcpu, cr0);
@@ -987,7 +1005,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
if (vcpu->arch.guest_state_protected)
return;
- if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+ if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
@@ -1001,7 +1019,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
if (static_cpu_has(X86_FEATURE_PKU) &&
vcpu->arch.pkru != vcpu->arch.host_pkru &&
((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
- kvm_read_cr4_bits(vcpu, X86_CR4_PKE)))
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
write_pkru(vcpu->arch.pkru);
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
}
@@ -1015,14 +1033,14 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (static_cpu_has(X86_FEATURE_PKU) &&
((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
- kvm_read_cr4_bits(vcpu, X86_CR4_PKE))) {
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) {
vcpu->arch.pkru = rdpkru();
if (vcpu->arch.pkru != vcpu->arch.host_pkru)
write_pkru(vcpu->arch.host_pkru);
}
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
- if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+ if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
@@ -1178,9 +1196,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
- if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
- return 1;
-
/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
return 1;
@@ -1227,7 +1242,7 @@ static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
* PCIDs for them are also 0, because MOV to CR3 always flushes the TLB
* with PCIDE=0.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
+ if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE))
return;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
@@ -1242,9 +1257,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
bool skip_tlb_flush = false;
unsigned long pcid = 0;
#ifdef CONFIG_X86_64
- bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
-
- if (pcid_enabled) {
+ if (kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)) {
skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
cr3 &= ~X86_CR3_PCID_NOFLUSH;
pcid = cr3 & X86_CR3_PCID_MASK;
@@ -1543,39 +1556,41 @@ static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
static unsigned num_emulated_msrs;
/*
- * List of msr numbers which are used to expose MSR-based features that
- * can be used by a hypervisor to validate requested CPU features.
+ * List of MSRs that control the existence of MSR-based features, i.e. MSRs
+ * that are effectively CPUID leafs. VMX MSRs are also included in the set of
+ * feature MSRs, but are handled separately to allow expedited lookups.
*/
-static const u32 msr_based_features_all[] = {
- MSR_IA32_VMX_BASIC,
- MSR_IA32_VMX_TRUE_PINBASED_CTLS,
- MSR_IA32_VMX_PINBASED_CTLS,
- MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
- MSR_IA32_VMX_PROCBASED_CTLS,
- MSR_IA32_VMX_TRUE_EXIT_CTLS,
- MSR_IA32_VMX_EXIT_CTLS,
- MSR_IA32_VMX_TRUE_ENTRY_CTLS,
- MSR_IA32_VMX_ENTRY_CTLS,
- MSR_IA32_VMX_MISC,
- MSR_IA32_VMX_CR0_FIXED0,
- MSR_IA32_VMX_CR0_FIXED1,
- MSR_IA32_VMX_CR4_FIXED0,
- MSR_IA32_VMX_CR4_FIXED1,
- MSR_IA32_VMX_VMCS_ENUM,
- MSR_IA32_VMX_PROCBASED_CTLS2,
- MSR_IA32_VMX_EPT_VPID_CAP,
- MSR_IA32_VMX_VMFUNC,
-
+static const u32 msr_based_features_all_except_vmx[] = {
MSR_AMD64_DE_CFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
};
-static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
+static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
+ (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)];
static unsigned int num_msr_based_features;
/*
+ * All feature MSRs except uCode revID, which tracks the currently loaded uCode
+ * patch, are immutable once the vCPU model is defined.
+ */
+static bool kvm_is_immutable_feature_msr(u32 msr)
+{
+ int i;
+
+ if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) {
+ if (msr == msr_based_features_all_except_vmx[i])
+ return msr != MSR_IA32_UCODE_REV;
+ }
+
+ return false;
+}
+
+/*
* Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
* does not yet virtualize. These include:
* 10 - MISC_PACKAGE_CTRLS
@@ -2192,6 +2207,22 @@ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
+ u64 val;
+
+ /*
+ * Disallow writes to immutable feature MSRs after KVM_RUN. KVM does
+ * not support modifying the guest vCPU model on the fly, e.g. changing
+ * the nVMX capabilities while L2 is running is nonsensical. Ignore
+ * writes of the same value, e.g. to allow userspace to blindly stuff
+ * all MSRs when emulating RESET.
+ */
+ if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) {
+ if (do_get_msr(vcpu, index, &val) || *data != val)
+ return -EINVAL;
+
+ return 0;
+ }
+
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
}
@@ -3614,9 +3645,40 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~kvm_caps.supported_perf_cap)
return 1;
+ /*
+ * Note, this is not just a performance optimization! KVM
+ * disallows changing feature MSRs after the vCPU has run; PMU
+ * refresh will bug the VM if called after the vCPU has run.
+ */
+ if (vcpu->arch.perf_capabilities == data)
+ break;
+
vcpu->arch.perf_capabilities = data;
kvm_pmu_refresh(vcpu);
- return 0;
+ break;
+ case MSR_IA32_PRED_CMD:
+ if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu))
+ return 1;
+
+ if (!boot_cpu_has(X86_FEATURE_IBPB) || (data & ~PRED_CMD_IBPB))
+ return 1;
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+ break;
+ case MSR_IA32_FLUSH_CMD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D))
+ return 1;
+
+ if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D) || (data & ~L1D_FLUSH))
+ return 1;
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ break;
case MSR_EFER:
return set_efer(vcpu, msr_info);
case MSR_K7_HWCR:
@@ -4531,9 +4593,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 0;
break;
case KVM_CAP_XSAVE2: {
- u64 guest_perm = xstate_get_guest_group_perm();
-
- r = xstate_required_size(kvm_caps.supported_xcr0 & guest_perm, false);
+ r = xstate_required_size(kvm_get_filtered_xcr0(), false);
if (r < sizeof(struct kvm_xsave))
r = sizeof(struct kvm_xsave);
break;
@@ -5033,7 +5093,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
return 0;
if (mce->status & MCI_STATUS_UC) {
if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
- !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
+ !kvm_is_cr4_bit_set(vcpu, X86_CR4_MCE)) {
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return 0;
}
@@ -5125,7 +5185,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
events->nmi.injected = vcpu->arch.nmi_injected;
- events->nmi.pending = vcpu->arch.nmi_pending != 0;
+ events->nmi.pending = kvm_get_nr_pending_nmis(vcpu);
events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
/* events->sipi_vector is never valid when reporting to user space */
@@ -5212,8 +5272,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
events->interrupt.shadow);
vcpu->arch.nmi_injected = events->nmi.injected;
- if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
- vcpu->arch.nmi_pending = events->nmi.pending;
+ if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) {
+ vcpu->arch.nmi_pending = 0;
+ atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending);
+ kvm_make_request(KVM_REQ_NMI, vcpu);
+ }
static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
@@ -7009,6 +7072,18 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
return r;
}
+static void kvm_probe_feature_msr(u32 msr_index)
+{
+ struct kvm_msr_entry msr = {
+ .index = msr_index,
+ };
+
+ if (kvm_get_msr_feature(&msr))
+ return;
+
+ msr_based_features[num_msr_based_features++] = msr_index;
+}
+
static void kvm_probe_msr_to_save(u32 msr_index)
{
u32 dummy[2];
@@ -7084,7 +7159,7 @@ static void kvm_probe_msr_to_save(u32 msr_index)
msrs_to_save[num_msrs_to_save++] = msr_index;
}
-static void kvm_init_msr_list(void)
+static void kvm_init_msr_lists(void)
{
unsigned i;
@@ -7110,15 +7185,11 @@ static void kvm_init_msr_list(void)
emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
}
- for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
- struct kvm_msr_entry msr;
+ for (i = KVM_FIRST_EMULATED_VMX_MSR; i <= KVM_LAST_EMULATED_VMX_MSR; i++)
+ kvm_probe_feature_msr(i);
- msr.index = msr_based_features_all[i];
- if (kvm_get_msr_feature(&msr))
- continue;
-
- msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
- }
+ for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++)
+ kvm_probe_feature_msr(msr_based_features_all_except_vmx[i]);
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -9452,7 +9523,7 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_caps.max_guest_tsc_khz = max;
}
kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits;
- kvm_init_msr_list();
+ kvm_init_msr_lists();
return 0;
out_unwind_ops:
@@ -9783,7 +9854,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
vcpu->run->hypercall.args[0] = gpa;
vcpu->run->hypercall.args[1] = npages;
vcpu->run->hypercall.args[2] = attrs;
- vcpu->run->hypercall.longmode = op_64_bit;
+ vcpu->run->hypercall.flags = 0;
+ if (op_64_bit)
+ vcpu->run->hypercall.flags |= KVM_EXIT_HYPERCALL_LONG_MODE;
+
+ WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
vcpu->arch.complete_userspace_io = complete_hypercall_exit;
return 0;
}
@@ -10138,19 +10213,46 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
static void process_nmi(struct kvm_vcpu *vcpu)
{
- unsigned limit = 2;
+ unsigned int limit;
/*
- * x86 is limited to one NMI running, and one NMI pending after it.
- * If an NMI is already in progress, limit further NMIs to just one.
- * Otherwise, allow two (and we'll inject the first one immediately).
+ * x86 is limited to one NMI pending, but because KVM can't react to
+ * incoming NMIs as quickly as bare metal, e.g. if the vCPU is
+ * scheduled out, KVM needs to play nice with two queued NMIs showing
+ * up at the same time. To handle this scenario, allow two NMIs to be
+ * (temporarily) pending so long as NMIs are not blocked and KVM is not
+ * waiting for a previous NMI injection to complete (which effectively
+ * blocks NMIs). KVM will immediately inject one of the two NMIs, and
+ * will request an NMI window to handle the second NMI.
*/
if (static_call(kvm_x86_get_nmi_mask)(vcpu) || vcpu->arch.nmi_injected)
limit = 1;
+ else
+ limit = 2;
+
+ /*
+ * Adjust the limit to account for pending virtual NMIs, which aren't
+ * tracked in vcpu->arch.nmi_pending.
+ */
+ if (static_call(kvm_x86_is_vnmi_pending)(vcpu))
+ limit--;
vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ if (vcpu->arch.nmi_pending &&
+ (static_call(kvm_x86_set_vnmi_pending)(vcpu)))
+ vcpu->arch.nmi_pending--;
+
+ if (vcpu->arch.nmi_pending)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
+
+/* Return total number of NMIs pending injection to the VM */
+int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.nmi_pending +
+ static_call(kvm_x86_is_vnmi_pending)(vcpu);
}
void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
@@ -13236,7 +13338,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
return 1;
}
- pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+ pcid_enabled = kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE);
switch (type) {
case INVPCID_TYPE_INDIV_ADDR:
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a8167b4..c544602 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -3,6 +3,7 @@
#define ARCH_X86_KVM_X86_H
#include <linux/kvm_host.h>
+#include <asm/fpu/xstate.h>
#include <asm/mce.h>
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
@@ -40,6 +41,14 @@ void kvm_spurious_fault(void);
failed; \
})
+/*
+ * The first...last VMX feature MSRs that are emulated by KVM. This may or may
+ * not cover all known VMX MSRs, as KVM doesn't emulate an MSR until there's an
+ * associated feature that KVM supports for nested virtualization.
+ */
+#define KVM_FIRST_EMULATED_VMX_MSR MSR_IA32_VMX_BASIC
+#define KVM_LAST_EMULATED_VMX_MSR MSR_IA32_VMX_VMFUNC
+
#define KVM_DEFAULT_PLE_GAP 128
#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
#define KVM_DEFAULT_PLE_WINDOW_GROW 2
@@ -83,6 +92,11 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
int kvm_check_nested_events(struct kvm_vcpu *vcpu);
+static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.last_vmentry_cpu != -1;
+}
+
static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu)
{
return vcpu->arch.exception.pending ||
@@ -123,15 +137,15 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
static inline bool is_protmode(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr0_bits(vcpu, X86_CR0_PE);
+ return kvm_is_cr0_bit_set(vcpu, X86_CR0_PE);
}
-static inline int is_long_mode(struct kvm_vcpu *vcpu)
+static inline bool is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
- return vcpu->arch.efer & EFER_LMA;
+ return !!(vcpu->arch.efer & EFER_LMA);
#else
- return 0;
+ return false;
#endif
}
@@ -171,19 +185,19 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
}
-static inline int is_pae(struct kvm_vcpu *vcpu)
+static inline bool is_pae(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
+ return kvm_is_cr4_bit_set(vcpu, X86_CR4_PAE);
}
-static inline int is_pse(struct kvm_vcpu *vcpu)
+static inline bool is_pse(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr4_bits(vcpu, X86_CR4_PSE);
+ return kvm_is_cr4_bit_set(vcpu, X86_CR4_PSE);
}
-static inline int is_paging(struct kvm_vcpu *vcpu)
+static inline bool is_paging(struct kvm_vcpu *vcpu)
{
- return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
+ return likely(kvm_is_cr0_bit_set(vcpu, X86_CR0_PG));
}
static inline bool is_pae_paging(struct kvm_vcpu *vcpu)
@@ -193,7 +207,7 @@ static inline bool is_pae_paging(struct kvm_vcpu *vcpu)
static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
+ return kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 57 : 48;
}
static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
@@ -315,6 +329,34 @@ extern struct kvm_caps kvm_caps;
extern bool enable_pmu;
+/*
+ * Get a filtered version of KVM's supported XCR0 that strips out dynamic
+ * features for which the current process doesn't (yet) have permission to use.
+ * This is intended to be used only when enumerating support to userspace,
+ * e.g. in KVM_GET_SUPPORTED_CPUID and KVM_CAP_XSAVE2, it does NOT need to be
+ * used to check/restrict guest behavior as KVM rejects KVM_SET_CPUID{2} if
+ * userspace attempts to enable unpermitted features.
+ */
+static inline u64 kvm_get_filtered_xcr0(void)
+{
+ u64 permitted_xcr0 = kvm_caps.supported_xcr0;
+
+ BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
+
+ if (permitted_xcr0 & XFEATURE_MASK_USER_DYNAMIC) {
+ permitted_xcr0 &= xstate_get_guest_group_perm();
+
+ /*
+ * Treat XTILE_CFG as unsupported if the current process isn't
+ * allowed to use XTILE_DATA, as attempting to set XTILE_CFG in
+ * XCR0 without setting XTILE_DATA is architecturally illegal.
+ */
+ if (!(permitted_xcr0 & XFEATURE_MASK_XTILE_DATA))
+ permitted_xcr0 &= ~XFEATURE_MASK_XTILE_CFG;
+ }
+ return permitted_xcr0;
+}
+
static inline bool kvm_mpx_supported(void)
{
return (kvm_caps.supported_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 7316a82..e91500a 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -10,6 +10,7 @@
#include <asm/fixmap.h>
#include <asm/desc.h>
#include <asm/kasan.h>
+#include <asm/setup.h>
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
@@ -29,6 +30,12 @@ static __init void init_cea_offsets(void)
unsigned int max_cea;
unsigned int i, j;
+ if (!kaslr_enabled()) {
+ for_each_possible_cpu(i)
+ per_cpu(_cea_offset, i) = i;
+ return;
+ }
+
max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
/* O(sodding terrible) */
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 88cccd6..c6efcf5 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -600,7 +600,8 @@ void __init sme_enable(struct boot_params *bp)
cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
((u64)bp->ext_cmd_line_ptr << 32));
- cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
+ if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
+ return;
if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
sme_me_mask = me_mask;
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3c5b52f..a9ec8c9 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -45,6 +45,6 @@
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
-obj-$(CONFIG_XEN_PV_DOM0) += vga.o
+obj-$(CONFIG_XEN_DOM0) += vga.o
obj-$(CONFIG_XEN_EFI) += efi.o
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index bb59cc6..093b78c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1390,7 +1390,8 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
x86_platform.set_legacy_features =
xen_dom0_set_legacy_features;
- xen_init_vga(info, xen_start_info->console.dom0.info_size);
+ xen_init_vga(info, xen_start_info->console.dom0.info_size,
+ &boot_params.screen_info);
xen_start_info->console.domU.mfn = 0;
xen_start_info->console.domU.evtchn = 0;
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index bcae606..ada3868 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -43,6 +43,19 @@ void __init xen_pvh_init(struct boot_params *boot_params)
x86_init.oem.banner = xen_banner;
xen_efi_init(boot_params);
+
+ if (xen_initial_domain()) {
+ struct xen_platform_op op = {
+ .cmd = XENPF_get_dom0_console,
+ };
+ int ret = HYPERVISOR_platform_op(&op);
+
+ if (ret > 0)
+ xen_init_vga(&op.u.dom0_console,
+ min(ret * sizeof(char),
+ sizeof(op.u.dom0_console)),
+ &boot_params->screen_info);
+ }
}
void __init mem_map_via_hcall(struct boot_params *boot_params_p)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 1d59736..b74ac25 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -20,6 +20,7 @@
#include <asm/pvclock.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
+#include <asm/xen/cpuid.h>
#include <xen/events.h>
#include <xen/features.h>
@@ -503,11 +504,7 @@ static int __init xen_tsc_safe_clocksource(void)
/* Leaf 4, sub-leaf 0 (0x40000x03) */
cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
- /* tsc_mode = no_emulate (2) */
- if (ebx != 2)
- return 0;
-
- return 1;
+ return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE;
}
static void __init xen_time_init(void)
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index 14ea32e..d97adab 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -9,10 +9,9 @@
#include "xen-ops.h"
-void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
+void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size,
+ struct screen_info *screen_info)
{
- struct screen_info *screen_info = &boot_params.screen_info;
-
/* This is drawn from a dump from vgacon:startup in
* standard Linux. */
screen_info->orig_video_mode = 3;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a8bb97..a109037 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -108,11 +108,12 @@ static inline void xen_uninit_lock_cpu(int cpu)
struct dom0_vga_console_info;
-#ifdef CONFIG_XEN_PV_DOM0
-void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
+#ifdef CONFIG_XEN_DOM0
+void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size,
+ struct screen_info *);
#else
static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
- size_t size)
+ size_t size, struct screen_info *si)
{
}
#endif
diff --git a/block/Kconfig b/block/Kconfig
index 5d9d9c8..941b2dc 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -204,9 +204,6 @@
source "block/partitions/Kconfig"
-config BLOCK_COMPAT
- def_bool COMPAT
-
config BLK_MQ_PCI
def_bool PCI
diff --git a/block/blk-core.c b/block/blk-core.c
index 9e5e027..42926e6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -959,16 +959,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
}
}
-unsigned long bdev_start_io_acct(struct block_device *bdev,
- unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time)
{
- const int sgrp = op_stat_group(op);
-
part_stat_lock();
update_io_ticks(bdev, start_time, false);
- part_stat_inc(bdev, ios[sgrp]);
- part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -984,13 +979,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), jiffies);
+ return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
- unsigned long start_time)
+ unsigned int sectors, unsigned long start_time)
{
const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies);
@@ -998,6 +992,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
part_stat_lock();
update_io_ticks(bdev, now, true);
+ part_stat_inc(bdev, ios[sgrp]);
+ part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -1007,7 +1003,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
struct block_device *orig_bdev)
{
- bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d0cb2ef..cf1a39a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2725,6 +2725,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
struct blk_mq_hw_ctx *this_hctx = NULL;
struct blk_mq_ctx *this_ctx = NULL;
struct request *requeue_list = NULL;
+ struct request **requeue_lastp = &requeue_list;
unsigned int depth = 0;
LIST_HEAD(list);
@@ -2735,10 +2736,10 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
this_hctx = rq->mq_hctx;
this_ctx = rq->mq_ctx;
} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
- rq_list_add(&requeue_list, rq);
+ rq_list_add_tail(&requeue_lastp, rq);
continue;
}
- list_add_tail(&rq->queuelist, &list);
+ list_add(&rq->queuelist, &list);
depth++;
} while (!rq_list_empty(plug->mq_list));
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ef59fee..a7482d2c 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -378,12 +378,13 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
#define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \
do { \
if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) { \
+ struct blk_mq_tag_set *__tag_set = (q)->tag_set; \
int srcu_idx; \
\
might_sleep_if(check_sleep); \
- srcu_idx = srcu_read_lock((q)->tag_set->srcu); \
+ srcu_idx = srcu_read_lock(__tag_set->srcu); \
(dispatch_ops); \
- srcu_read_unlock((q)->tag_set->srcu, srcu_idx); \
+ srcu_read_unlock(__tag_set->srcu, srcu_idx); \
} else { \
rcu_read_lock(); \
(dispatch_ops); \
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 4fa769c..f0d4ff3 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -79,16 +79,16 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7,
}
if (sinfo->msgdigest_len != sig->digest_size) {
- pr_debug("Sig %u: Invalid digest size (%u)\n",
- sinfo->index, sinfo->msgdigest_len);
+ pr_warn("Sig %u: Invalid digest size (%u)\n",
+ sinfo->index, sinfo->msgdigest_len);
ret = -EBADMSG;
goto error;
}
if (memcmp(sig->digest, sinfo->msgdigest,
sinfo->msgdigest_len) != 0) {
- pr_debug("Sig %u: Message digest doesn't match\n",
- sinfo->index);
+ pr_warn("Sig %u: Message digest doesn't match\n",
+ sinfo->index);
ret = -EKEYREJECTED;
goto error;
}
@@ -478,7 +478,7 @@ int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7,
const void *data, size_t datalen)
{
if (pkcs7->data) {
- pr_debug("Data already supplied\n");
+ pr_warn("Data already supplied\n");
return -EINVAL;
}
pkcs7->data = data;
diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
index 7553ab1..22beaf2 100644
--- a/crypto/asymmetric_keys/verify_pefile.c
+++ b/crypto/asymmetric_keys/verify_pefile.c
@@ -74,7 +74,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
break;
default:
- pr_debug("Unknown PEOPT magic = %04hx\n", pe32->magic);
+ pr_warn("Unknown PEOPT magic = %04hx\n", pe32->magic);
return -ELIBBAD;
}
@@ -95,7 +95,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
ctx->certs_size = ddir->certs.size;
if (!ddir->certs.virtual_address || !ddir->certs.size) {
- pr_debug("Unsigned PE binary\n");
+ pr_warn("Unsigned PE binary\n");
return -ENODATA;
}
@@ -127,7 +127,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf,
unsigned len;
if (ctx->sig_len < sizeof(wrapper)) {
- pr_debug("Signature wrapper too short\n");
+ pr_warn("Signature wrapper too short\n");
return -ELIBBAD;
}
@@ -135,19 +135,23 @@ static int pefile_strip_sig_wrapper(const void *pebuf,
pr_debug("sig wrapper = { %x, %x, %x }\n",
wrapper.length, wrapper.revision, wrapper.cert_type);
- /* Both pesign and sbsign round up the length of certificate table
- * (in optional header data directories) to 8 byte alignment.
+ /* sbsign rounds up the length of certificate table (in optional
+ * header data directories) to 8 byte alignment. However, the PE
+ * specification states that while entries are 8-byte aligned, this is
+ * not included in their length, and as a result, pesign has not
+ * rounded up since 0.110.
*/
- if (round_up(wrapper.length, 8) != ctx->sig_len) {
- pr_debug("Signature wrapper len wrong\n");
+ if (wrapper.length > ctx->sig_len) {
+ pr_warn("Signature wrapper bigger than sig len (%x > %x)\n",
+ ctx->sig_len, wrapper.length);
return -ELIBBAD;
}
if (wrapper.revision != WIN_CERT_REVISION_2_0) {
- pr_debug("Signature is not revision 2.0\n");
+ pr_warn("Signature is not revision 2.0\n");
return -ENOTSUPP;
}
if (wrapper.cert_type != WIN_CERT_TYPE_PKCS_SIGNED_DATA) {
- pr_debug("Signature certificate type is not PKCS\n");
+ pr_warn("Signature certificate type is not PKCS\n");
return -ENOTSUPP;
}
@@ -160,7 +164,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf,
ctx->sig_offset += sizeof(wrapper);
ctx->sig_len -= sizeof(wrapper);
if (ctx->sig_len < 4) {
- pr_debug("Signature data missing\n");
+ pr_warn("Signature data missing\n");
return -EKEYREJECTED;
}
@@ -194,7 +198,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf,
return 0;
}
not_pkcs7:
- pr_debug("Signature data not PKCS#7\n");
+ pr_warn("Signature data not PKCS#7\n");
return -ELIBBAD;
}
@@ -337,8 +341,8 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen,
digest_size = crypto_shash_digestsize(tfm);
if (digest_size != ctx->digest_len) {
- pr_debug("Digest size mismatch (%zx != %x)\n",
- digest_size, ctx->digest_len);
+ pr_warn("Digest size mismatch (%zx != %x)\n",
+ digest_size, ctx->digest_len);
ret = -EBADMSG;
goto error_no_desc;
}
@@ -369,7 +373,7 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen,
* PKCS#7 certificate.
*/
if (memcmp(digest, ctx->digest, ctx->digest_len) != 0) {
- pr_debug("Digest mismatch\n");
+ pr_warn("Digest mismatch\n");
ret = -EKEYREJECTED;
} else {
pr_debug("The digests match!\n");
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index 07aa77a..f22fd44 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += habanalabs/
-obj-y += ivpu/
+obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
+obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index 10975bb..a35dd0e 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -536,16 +536,19 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
static struct acpi_table_header *acpi_get_pptt(void)
{
static struct acpi_table_header *pptt;
+ static bool is_pptt_checked;
acpi_status status;
/*
* PPTT will be used at runtime on every CPU hotplug in path, so we
* don't need to call acpi_put_table() to release the table mapping.
*/
- if (!pptt) {
+ if (!pptt && !is_pptt_checked) {
status = acpi_get_table(ACPI_SIG_PPTT, 0, &pptt);
if (ACPI_FAILURE(status))
acpi_pptt_warn_missing();
+
+ is_pptt_checked = true;
}
return pptt;
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 1278969..4bd16b3 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -263,6 +263,12 @@ static int __init acpi_processor_driver_init(void)
if (acpi_disabled)
return 0;
+ if (!cpufreq_register_notifier(&acpi_processor_notifier_block,
+ CPUFREQ_POLICY_NOTIFIER)) {
+ acpi_processor_cpufreq_init = true;
+ acpi_processor_ignore_ppc_init();
+ }
+
result = driver_register(&acpi_processor_driver);
if (result < 0)
return result;
@@ -276,12 +282,6 @@ static int __init acpi_processor_driver_init(void)
cpuhp_setup_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD, "acpi/cpu-drv:dead",
NULL, acpi_soft_cpu_dead);
- if (!cpufreq_register_notifier(&acpi_processor_notifier_block,
- CPUFREQ_POLICY_NOTIFIER)) {
- acpi_processor_cpufreq_init = true;
- acpi_processor_ignore_ppc_init();
- }
-
acpi_processor_throttling_init();
return 0;
err:
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index e534fd49a..b7c6287 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -140,9 +140,13 @@ void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
ret = freq_qos_add_request(&policy->constraints,
&pr->thermal_req,
FREQ_QOS_MAX, INT_MAX);
- if (ret < 0)
+ if (ret < 0) {
pr_err("Failed to add freq constraint for CPU%d (%d)\n",
cpu, ret);
+ continue;
+ }
+
+ thermal_cooling_device_update(pr->cdev);
}
}
@@ -153,8 +157,12 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
for_each_cpu(cpu, policy->related_cpus) {
struct acpi_processor *pr = per_cpu(processors, cpu);
- if (pr)
- freq_qos_remove_request(&pr->thermal_req);
+ if (!pr)
+ continue;
+
+ freq_qos_remove_request(&pr->thermal_req);
+
+ thermal_cooling_device_update(pr->cdev);
}
}
#else /* ! CONFIG_CPU_FREQ */
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 7c9125d..7b4801c 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -400,6 +400,13 @@ static const struct dmi_system_id medion_laptop[] = {
DMI_MATCH(DMI_BOARD_NAME, "M17T"),
},
},
+ {
+ .ident = "MEDION S17413",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MEDION"),
+ DMI_MATCH(DMI_BOARD_NAME, "M1xA"),
+ },
+ },
{ }
};
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 710ac64..fd7cbce 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -497,6 +497,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
},
{
.callback = video_detect_force_native,
+ /* Acer Aspire 3830TG */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 3830TG"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
/* Acer Aspire 4810T */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
@@ -716,6 +724,13 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5515"),
},
},
+ {
+ .callback = video_detect_force_native,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 15 3535"),
+ },
+ },
/*
* Desktops which falsely report a backlight and which our heuristics
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index e45285d..da57270 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -251,6 +251,7 @@ bool force_storage_d3(void)
#define ACPI_QUIRK_UART1_TTY_UART2_SKIP BIT(1)
#define ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY BIT(2)
#define ACPI_QUIRK_USE_ACPI_AC_AND_BATTERY BIT(3)
+#define ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS BIT(4)
static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
/*
@@ -280,13 +281,35 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
*/
#if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
{
+ /* Acer Iconia One 7 B1-750 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VESPA2"),
+ },
+ .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
+ },
+ {
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ME176C"),
},
.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
ACPI_QUIRK_UART1_TTY_UART2_SKIP |
- ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
+ },
+ {
+ /* Lenovo Yoga Book X90F/L */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
+ },
+ .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
},
{
.matches = {
@@ -294,7 +317,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "TF103C"),
},
.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
- ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
},
{
/* Lenovo Yoga Tablet 2 1050F/L */
@@ -336,7 +360,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"),
},
.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
- ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
},
{
/* Whitelabel (sold as various brands) TM800A550L */
@@ -413,6 +438,20 @@ int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *s
return 0;
}
EXPORT_SYMBOL_GPL(acpi_quirk_skip_serdev_enumeration);
+
+bool acpi_quirk_skip_gpio_event_handlers(void)
+{
+ const struct dmi_system_id *dmi_id;
+ long quirks;
+
+ dmi_id = dmi_first_match(acpi_quirk_skip_dmi_ids);
+ if (!dmi_id)
+ return false;
+
+ quirks = (unsigned long)dmi_id->driver_data;
+ return (quirks & ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS);
+}
+EXPORT_SYMBOL_GPL(acpi_quirk_skip_gpio_event_handlers);
#endif
/* Lists of PMIC ACPI HIDs with an (often better) native charger driver */
diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c
index 294a266..c1576d9 100644
--- a/drivers/ata/pata_parport/pata_parport.c
+++ b/drivers/ata/pata_parport/pata_parport.c
@@ -381,6 +381,7 @@ static void pata_parport_dev_release(struct device *dev)
{
struct pi_adapter *pi = container_of(dev, struct pi_adapter, dev);
+ ida_free(&pata_parport_bus_dev_ids, dev->id);
kfree(pi);
}
@@ -433,23 +434,27 @@ static struct pi_adapter *pi_init_one(struct parport *parport,
if (bus_for_each_dev(&pata_parport_bus_type, NULL, &match, pi_find_dev))
return NULL;
- pi = kzalloc(sizeof(struct pi_adapter), GFP_KERNEL);
- if (!pi)
+ id = ida_alloc(&pata_parport_bus_dev_ids, GFP_KERNEL);
+ if (id < 0)
return NULL;
+ pi = kzalloc(sizeof(struct pi_adapter), GFP_KERNEL);
+ if (!pi) {
+ ida_free(&pata_parport_bus_dev_ids, id);
+ return NULL;
+ }
+
/* set up pi->dev before pi_probe_unit() so it can use dev_printk() */
pi->dev.parent = &pata_parport_bus;
pi->dev.bus = &pata_parport_bus_type;
pi->dev.driver = &pr->driver;
pi->dev.release = pata_parport_dev_release;
- id = ida_alloc(&pata_parport_bus_dev_ids, GFP_KERNEL);
- if (id < 0)
- return NULL; /* pata_parport_dev_release will do kfree(pi) */
pi->dev.id = id;
dev_set_name(&pi->dev, "pata_parport.%u", pi->dev.id);
if (device_register(&pi->dev)) {
put_device(&pi->dev);
- goto out_ida_free;
+ /* pata_parport_dev_release will do ida_free(dev->id) and kfree(pi) */
+ return NULL;
}
pi->proto = pr;
@@ -464,8 +469,7 @@ static struct pi_adapter *pi_init_one(struct parport *parport,
pi->port = parport->base;
par_cb.private = pi;
- pi->pardev = parport_register_dev_model(parport, DRV_NAME, &par_cb,
- pi->dev.id);
+ pi->pardev = parport_register_dev_model(parport, DRV_NAME, &par_cb, id);
if (!pi->pardev)
goto out_module_put;
@@ -487,12 +491,13 @@ static struct pi_adapter *pi_init_one(struct parport *parport,
pi_connect(pi);
if (ata_host_activate(host, 0, NULL, 0, &pata_parport_sht))
- goto out_unreg_parport;
+ goto out_disconnect;
return pi;
-out_unreg_parport:
+out_disconnect:
pi_disconnect(pi);
+out_unreg_parport:
parport_unregister_device(pi->pardev);
if (pi->proto->release_proto)
pi->proto->release_proto(pi);
@@ -500,8 +505,7 @@ static struct pi_adapter *pi_init_one(struct parport *parport,
module_put(pi->proto->owner);
out_unreg_dev:
device_unregister(&pi->dev);
-out_ida_free:
- ida_free(&pata_parport_bus_dev_ids, pi->dev.id);
+ /* pata_parport_dev_release will do ida_free(dev->id) and kfree(pi) */
return NULL;
}
@@ -626,8 +630,7 @@ static void pi_remove_one(struct device *dev)
pi_disconnect(pi);
pi_release(pi);
device_unregister(dev);
- ida_free(&pata_parport_bus_dev_ids, dev->id);
- /* pata_parport_dev_release will do kfree(pi) */
+ /* pata_parport_dev_release will do ida_free(dev->id) and kfree(pi) */
}
static ssize_t delete_device_store(struct bus_type *bus, const char *buf,
@@ -643,6 +646,7 @@ static ssize_t delete_device_store(struct bus_type *bus, const char *buf,
}
pi_remove_one(dev);
+ put_device(dev);
mutex_unlock(&pi_mutex);
return count;
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index eec0cc2..e327a022 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -2909,6 +2909,7 @@ close_card_oam(struct idt77252_dev *card)
recycle_rx_pool_skb(card, &vc->rcv.rx_pool);
}
+ kfree(vc);
}
}
}
@@ -2952,6 +2953,15 @@ open_card_ubr0(struct idt77252_dev *card)
return 0;
}
+static void
+close_card_ubr0(struct idt77252_dev *card)
+{
+ struct vc_map *vc = card->vcs[0];
+
+ free_scq(card, vc->scq);
+ kfree(vc);
+}
+
static int
idt77252_dev_open(struct idt77252_dev *card)
{
@@ -3001,6 +3011,7 @@ static void idt77252_dev_close(struct atm_dev *dev)
struct idt77252_dev *card = dev->dev_data;
u32 conf;
+ close_card_ubr0(card);
close_card_oam(card);
conf = SAR_CFG_RXPTH | /* enable receive path */
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 8393734..28eb59f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1859,35 +1859,44 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
static void loop_handle_cmd(struct loop_cmd *cmd)
{
+ struct cgroup_subsys_state *cmd_blkcg_css = cmd->blkcg_css;
+ struct cgroup_subsys_state *cmd_memcg_css = cmd->memcg_css;
struct request *rq = blk_mq_rq_from_pdu(cmd);
const bool write = op_is_write(req_op(rq));
struct loop_device *lo = rq->q->queuedata;
int ret = 0;
struct mem_cgroup *old_memcg = NULL;
+ const bool use_aio = cmd->use_aio;
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
ret = -EIO;
goto failed;
}
- if (cmd->blkcg_css)
- kthread_associate_blkcg(cmd->blkcg_css);
- if (cmd->memcg_css)
+ if (cmd_blkcg_css)
+ kthread_associate_blkcg(cmd_blkcg_css);
+ if (cmd_memcg_css)
old_memcg = set_active_memcg(
- mem_cgroup_from_css(cmd->memcg_css));
+ mem_cgroup_from_css(cmd_memcg_css));
+ /*
+ * do_req_filebacked() may call blk_mq_complete_request() synchronously
+ * or asynchronously if using aio. Hence, do not touch 'cmd' after
+ * do_req_filebacked() has returned unless we are sure that 'cmd' has
+ * not yet been completed.
+ */
ret = do_req_filebacked(lo, rq);
- if (cmd->blkcg_css)
+ if (cmd_blkcg_css)
kthread_associate_blkcg(NULL);
- if (cmd->memcg_css) {
+ if (cmd_memcg_css) {
set_active_memcg(old_memcg);
- css_put(cmd->memcg_css);
+ css_put(cmd_memcg_css);
}
failed:
/* complete non-aio request */
- if (!cmd->use_aio || ret) {
+ if (!use_aio || ret) {
if (ret == -EOPNOTSUPP)
cmd->ret = ret;
else
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 4c601ca..9e6b032 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1413,8 +1413,7 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
case NULL_IRQ_SOFTIRQ:
switch (cmd->nq->dev->queue_mode) {
case NULL_Q_MQ:
- if (likely(!blk_should_fake_timeout(cmd->rq->q)))
- blk_mq_complete_request(cmd->rq);
+ blk_mq_complete_request(cmd->rq);
break;
case NULL_Q_BIO:
/*
@@ -1658,12 +1657,13 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
}
static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
+ const struct blk_mq_queue_data *bd)
{
- struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ struct request *rq = bd->rq;
+ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
struct nullb_queue *nq = hctx->driver_data;
- sector_t nr_sectors = blk_rq_sectors(bd->rq);
- sector_t sector = blk_rq_pos(bd->rq);
+ sector_t nr_sectors = blk_rq_sectors(rq);
+ sector_t sector = blk_rq_pos(rq);
const bool is_poll = hctx->type == HCTX_TYPE_POLL;
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
@@ -1672,14 +1672,15 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
}
- cmd->rq = bd->rq;
+ cmd->rq = rq;
cmd->error = BLK_STS_OK;
cmd->nq = nq;
- cmd->fake_timeout = should_timeout_request(bd->rq);
+ cmd->fake_timeout = should_timeout_request(rq) ||
+ blk_should_fake_timeout(rq->q);
- blk_mq_start_request(bd->rq);
+ blk_mq_start_request(rq);
- if (should_requeue_request(bd->rq)) {
+ if (should_requeue_request(rq)) {
/*
* Alternate between hitting the core BUSY path, and the
* driver driven requeue path
@@ -1687,22 +1688,20 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
nq->requeue_selection++;
if (nq->requeue_selection & 1)
return BLK_STS_RESOURCE;
- else {
- blk_mq_requeue_request(bd->rq, true);
- return BLK_STS_OK;
- }
+ blk_mq_requeue_request(rq, true);
+ return BLK_STS_OK;
}
if (is_poll) {
spin_lock(&nq->poll_lock);
- list_add_tail(&bd->rq->queuelist, &nq->poll_list);
+ list_add_tail(&rq->queuelist, &nq->poll_list);
spin_unlock(&nq->poll_lock);
return BLK_STS_OK;
}
if (cmd->fake_timeout)
return BLK_STS_OK;
- return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
+ return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
}
static void cleanup_queue(struct nullb_queue *nq)
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index fb855da..9fa821f 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -972,6 +972,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
print_version();
hp = mdesc_grab();
+ if (!hp)
+ return -ENODEV;
err = -ENODEV;
if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index d1d1c8d..c73cc57 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -715,7 +715,8 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
}
}
-static void ubq_complete_io_cmd(struct ublk_io *io, int res)
+static void ubq_complete_io_cmd(struct ublk_io *io, int res,
+ unsigned issue_flags)
{
/* mark this cmd owned by ublksrv */
io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
@@ -727,7 +728,7 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res)
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
/* tell ublksrv one io request is coming */
- io_uring_cmd_done(io->cmd, res, 0);
+ io_uring_cmd_done(io->cmd, res, 0, issue_flags);
}
#define UBLK_REQUEUE_DELAY_MS 3
@@ -744,7 +745,8 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0);
}
-static inline void __ublk_rq_task_work(struct request *req)
+static inline void __ublk_rq_task_work(struct request *req,
+ unsigned issue_flags)
{
struct ublk_queue *ubq = req->mq_hctx->driver_data;
int tag = req->tag;
@@ -782,7 +784,7 @@ static inline void __ublk_rq_task_work(struct request *req)
pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
__func__, io->cmd->cmd_op, ubq->q_id,
req->tag, io->flags);
- ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
+ ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA, issue_flags);
return;
}
/*
@@ -820,17 +822,18 @@ static inline void __ublk_rq_task_work(struct request *req)
mapped_bytes >> 9;
}
- ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
+ ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
}
-static inline void ublk_forward_io_cmds(struct ublk_queue *ubq)
+static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
+ unsigned issue_flags)
{
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
struct ublk_rq_data *data, *tmp;
io_cmds = llist_reverse_order(io_cmds);
llist_for_each_entry_safe(data, tmp, io_cmds, node)
- __ublk_rq_task_work(blk_mq_rq_from_pdu(data));
+ __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
}
static inline void ublk_abort_io_cmds(struct ublk_queue *ubq)
@@ -842,12 +845,12 @@ static inline void ublk_abort_io_cmds(struct ublk_queue *ubq)
__ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
}
-static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
+static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
{
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct ublk_queue *ubq = pdu->ubq;
- ublk_forward_io_cmds(ubq);
+ ublk_forward_io_cmds(ubq, issue_flags);
}
static void ublk_rq_task_work_fn(struct callback_head *work)
@@ -856,8 +859,9 @@ static void ublk_rq_task_work_fn(struct callback_head *work)
struct ublk_rq_data, work);
struct request *req = blk_mq_rq_from_pdu(data);
struct ublk_queue *ubq = req->mq_hctx->driver_data;
+ unsigned issue_flags = IO_URING_F_UNLOCKED;
- ublk_forward_io_cmds(ubq);
+ ublk_forward_io_cmds(ubq, issue_flags);
}
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
@@ -1111,7 +1115,8 @@ static void ublk_cancel_queue(struct ublk_queue *ubq)
struct ublk_io *io = &ubq->ios[i];
if (io->flags & UBLK_IO_FLAG_ACTIVE)
- io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0);
+ io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0,
+ IO_URING_F_UNLOCKED);
}
/* all io commands are canceled */
@@ -1351,7 +1356,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
return -EIOCBQUEUED;
out:
- io_uring_cmd_done(cmd, ret, 0);
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
__func__, cmd_op, tag, ret, io->flags);
return -EIOCBQUEUED;
@@ -1602,17 +1607,18 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
get_device(&ub->cdev_dev);
+ ub->dev_info.state = UBLK_S_DEV_LIVE;
ret = add_disk(disk);
if (ret) {
/*
* Has to drop the reference since ->free_disk won't be
* called in case of add_disk failure.
*/
+ ub->dev_info.state = UBLK_S_DEV_DEAD;
ublk_put_device(ub);
goto out_put_disk;
}
set_bit(UB_STATE_USED, &ub->state);
- ub->dev_info.state = UBLK_S_DEV_LIVE;
out_put_disk:
if (ret)
put_disk(disk);
@@ -2233,7 +2239,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
if (ub)
ublk_put_device(ub);
out:
- io_uring_cmd_done(cmd, ret, 0);
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n",
__func__, ret, cmd->cmd_op, header->dev_id, header->queue_id);
return -EIOCBQUEUED;
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index bede8b0..af77468 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -26,7 +26,14 @@
#define ECDSA_HEADER_LEN 320
#define BTINTEL_PPAG_NAME "PPAG"
-#define BTINTEL_PPAG_PREFIX "\\_SB_.PCI0.XHCI.RHUB"
+
+/* structure to store the PPAG data read from ACPI table */
+struct btintel_ppag {
+ u32 domain;
+ u32 mode;
+ acpi_status status;
+ struct hci_dev *hdev;
+};
#define CMD_WRITE_BOOT_PARAMS 0xfc0e
struct cmd_write_boot_params {
@@ -1295,17 +1302,16 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data
status = acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
if (ACPI_FAILURE(status)) {
- bt_dev_warn(hdev, "ACPI Failure: %s", acpi_format_exception(status));
+ bt_dev_warn(hdev, "PPAG-BT: ACPI Failure: %s", acpi_format_exception(status));
return status;
}
- if (strncmp(BTINTEL_PPAG_PREFIX, string.pointer,
- strlen(BTINTEL_PPAG_PREFIX))) {
+ len = strlen(string.pointer);
+ if (len < strlen(BTINTEL_PPAG_NAME)) {
kfree(string.pointer);
return AE_OK;
}
- len = strlen(string.pointer);
if (strncmp((char *)string.pointer + len - 4, BTINTEL_PPAG_NAME, 4)) {
kfree(string.pointer);
return AE_OK;
@@ -1314,7 +1320,8 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data
status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
if (ACPI_FAILURE(status)) {
- bt_dev_warn(hdev, "ACPI Failure: %s", acpi_format_exception(status));
+ ppag->status = status;
+ bt_dev_warn(hdev, "PPAG-BT: ACPI Failure: %s", acpi_format_exception(status));
return status;
}
@@ -1323,8 +1330,9 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data
if (p->type != ACPI_TYPE_PACKAGE || p->package.count != 2) {
kfree(buffer.pointer);
- bt_dev_warn(hdev, "Invalid object type: %d or package count: %d",
+ bt_dev_warn(hdev, "PPAG-BT: Invalid object type: %d or package count: %d",
p->type, p->package.count);
+ ppag->status = AE_ERROR;
return AE_ERROR;
}
@@ -1335,6 +1343,7 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data
ppag->domain = (u32)p->package.elements[0].integer.value;
ppag->mode = (u32)p->package.elements[1].integer.value;
+ ppag->status = AE_OK;
kfree(buffer.pointer);
return AE_CTRL_TERMINATE;
}
@@ -2314,12 +2323,12 @@ static int btintel_configure_offload(struct hci_dev *hdev)
static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver)
{
- acpi_status status;
struct btintel_ppag ppag;
struct sk_buff *skb;
struct btintel_loc_aware_reg ppag_cmd;
+ acpi_handle handle;
- /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */
+ /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */
switch (ver->cnvr_top & 0xFFF) {
case 0x504: /* Hrp2 */
case 0x202: /* Jfp2 */
@@ -2327,29 +2336,35 @@ static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver
return;
}
+ handle = ACPI_HANDLE(GET_HCIDEV_DEV(hdev));
+ if (!handle) {
+ bt_dev_info(hdev, "No support for BT device in ACPI firmware");
+ return;
+ }
+
memset(&ppag, 0, sizeof(ppag));
ppag.hdev = hdev;
- status = acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX, NULL,
- btintel_ppag_callback, &ppag, NULL);
+ ppag.status = AE_NOT_FOUND;
+ acpi_walk_namespace(ACPI_TYPE_PACKAGE, handle, 1, NULL,
+ btintel_ppag_callback, &ppag, NULL);
- if (ACPI_FAILURE(status)) {
- /* Do not log warning message if ACPI entry is not found */
- if (status == AE_NOT_FOUND)
+ if (ACPI_FAILURE(ppag.status)) {
+ if (ppag.status == AE_NOT_FOUND) {
+ bt_dev_dbg(hdev, "PPAG-BT: ACPI entry not found");
return;
- bt_dev_warn(hdev, "PPAG: ACPI Failure: %s", acpi_format_exception(status));
+ }
return;
}
if (ppag.domain != 0x12) {
- bt_dev_warn(hdev, "PPAG-BT Domain disabled");
+ bt_dev_warn(hdev, "PPAG-BT: domain is not bluetooth");
return;
}
/* PPAG mode, BIT0 = 0 Disabled, BIT0 = 1 Enabled */
if (!(ppag.mode & BIT(0))) {
- bt_dev_dbg(hdev, "PPAG disabled");
+ bt_dev_dbg(hdev, "PPAG-BT: disabled");
return;
}
diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
index 8e7da87..8fdb65b 100644
--- a/drivers/bluetooth/btintel.h
+++ b/drivers/bluetooth/btintel.h
@@ -137,13 +137,6 @@ struct intel_offload_use_cases {
__u8 preset[8];
} __packed;
-/* structure to store the PPAG data read from ACPI table */
-struct btintel_ppag {
- u32 domain;
- u32 mode;
- struct hci_dev *hdev;
-};
-
struct btintel_loc_aware_reg {
__le32 mcc;
__le32 sel;
diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c
index 2acb719..11c7e04 100644
--- a/drivers/bluetooth/btqcomsmd.c
+++ b/drivers/bluetooth/btqcomsmd.c
@@ -122,6 +122,21 @@ static int btqcomsmd_setup(struct hci_dev *hdev)
return 0;
}
+static int btqcomsmd_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
+{
+ int ret;
+
+ ret = qca_set_bdaddr_rome(hdev, bdaddr);
+ if (ret)
+ return ret;
+
+ /* The firmware stops responding for a while after setting the bdaddr,
+ * causing timeouts for subsequent commands. Sleep a bit to avoid this.
+ */
+ usleep_range(1000, 10000);
+ return 0;
+}
+
static int btqcomsmd_probe(struct platform_device *pdev)
{
struct btqcomsmd *btq;
@@ -162,7 +177,7 @@ static int btqcomsmd_probe(struct platform_device *pdev)
hdev->close = btqcomsmd_close;
hdev->send = btqcomsmd_send;
hdev->setup = btqcomsmd_setup;
- hdev->set_bdaddr = qca_set_bdaddr_rome;
+ hdev->set_bdaddr = btqcomsmd_set_bdaddr;
ret = hci_register_dev(hdev);
if (ret < 0)
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 795be33..0289360 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -354,6 +354,7 @@ static void btsdio_remove(struct sdio_func *func)
BT_DBG("func %p", func);
+ cancel_work_sync(&data->work);
if (!data)
return;
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 18bc947..5c53615 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -1050,21 +1050,11 @@ static int btusb_recv_bulk(struct btusb_data *data, void *buffer, int count)
hci_skb_expect(skb) -= len;
if (skb->len == HCI_ACL_HDR_SIZE) {
- __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle);
__le16 dlen = hci_acl_hdr(skb)->dlen;
- __u8 type;
/* Complete ACL header */
hci_skb_expect(skb) = __le16_to_cpu(dlen);
- /* Detect if ISO packet has been sent over bulk */
- if (hci_conn_num(data->hdev, ISO_LINK)) {
- type = hci_conn_lookup_type(data->hdev,
- hci_handle(handle));
- if (type == ISO_LINK)
- hci_skb_pkt_type(skb) = HCI_ISODATA_PKT;
- }
-
if (skb_tailroom(skb) < hci_skb_expect(skb)) {
kfree_skb(skb);
skb = NULL;
diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
index 2a6b4f6..36d4248 100644
--- a/drivers/bus/imx-weim.c
+++ b/drivers/bus/imx-weim.c
@@ -204,8 +204,8 @@ static int weim_parse_dt(struct platform_device *pdev)
const struct of_device_id *of_id = of_match_device(weim_id_table,
&pdev->dev);
const struct imx_weim_devtype *devtype = of_id->data;
+ int ret = 0, have_child = 0;
struct device_node *child;
- int ret, have_child = 0;
struct weim_priv *priv;
void __iomem *base;
u32 reg;
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index b6c5bf6..1eef05b 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -91,7 +91,7 @@
config COMMON_CLK_HI655X
tristate "Clock driver for Hi655x" if EXPERT
depends on (MFD_HI655X_PMIC || COMPILE_TEST)
- depends on REGMAP
+ select REGMAP
default MFD_HI655X_PMIC
help
This driver supports the hi655x PMIC clock. This
diff --git a/drivers/clk/bcm/clk-bcm2835-aux.c b/drivers/clk/bcm/clk-bcm2835-aux.c
index 290a284..0fafa5c 100644
--- a/drivers/clk/bcm/clk-bcm2835-aux.c
+++ b/drivers/clk/bcm/clk-bcm2835-aux.c
@@ -69,4 +69,3 @@ builtin_platform_driver(bcm2835_aux_clk_driver);
MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
MODULE_DESCRIPTION("BCM2835 auxiliary peripheral clock driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index e74fe62..8dc476e 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -2350,4 +2350,3 @@ builtin_platform_driver(bcm2835_clk_driver);
MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
MODULE_DESCRIPTION("BCM2835 clock driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/clk/clk-fixed-mmio.c b/drivers/clk/clk-fixed-mmio.c
index 5225d17..8609fca 100644
--- a/drivers/clk/clk-fixed-mmio.c
+++ b/drivers/clk/clk-fixed-mmio.c
@@ -99,4 +99,3 @@ module_platform_driver(of_fixed_mmio_clk_driver);
MODULE_AUTHOR("Jan Kotas <jank@cadence.com>");
MODULE_DESCRIPTION("Memory Mapped IO Fixed clock driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/clk-fsl-sai.c b/drivers/clk/clk-fsl-sai.c
index 6238fce..ee5baf9 100644
--- a/drivers/clk/clk-fsl-sai.c
+++ b/drivers/clk/clk-fsl-sai.c
@@ -88,5 +88,4 @@ module_platform_driver(fsl_sai_clk_driver);
MODULE_DESCRIPTION("Freescale SAI bitclock-as-a-clock driver");
MODULE_AUTHOR("Michael Walle <michael@walle.cc>");
-MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:fsl-sai-clk");
diff --git a/drivers/clk/clk-k210.c b/drivers/clk/clk-k210.c
index 67a7cb3..4eed667 100644
--- a/drivers/clk/clk-k210.c
+++ b/drivers/clk/clk-k210.c
@@ -495,7 +495,7 @@ static unsigned long k210_pll_get_rate(struct clk_hw *hw,
f = FIELD_GET(K210_PLL_CLKF, reg) + 1;
od = FIELD_GET(K210_PLL_CLKOD, reg) + 1;
- return (u64)parent_rate * f / (r * od);
+ return div_u64((u64)parent_rate * f, r * od);
}
static const struct clk_ops k210_pll_ops = {
diff --git a/drivers/clk/hisilicon/clk-hi3559a.c b/drivers/clk/hisilicon/clk-hi3559a.c
index 9ea1a80..8036bd8 100644
--- a/drivers/clk/hisilicon/clk-hi3559a.c
+++ b/drivers/clk/hisilicon/clk-hi3559a.c
@@ -841,5 +841,4 @@ static void __exit hi3559av100_crg_exit(void)
module_exit(hi3559av100_crg_exit);
-MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("HiSilicon Hi3559AV100 CRG Driver");
diff --git a/drivers/clk/microchip/clk-mpfs-ccc.c b/drivers/clk/microchip/clk-mpfs-ccc.c
index 0ddc73e..bce61c4 100644
--- a/drivers/clk/microchip/clk-mpfs-ccc.c
+++ b/drivers/clk/microchip/clk-mpfs-ccc.c
@@ -291,4 +291,3 @@ module_exit(clk_ccc_exit);
MODULE_DESCRIPTION("Microchip PolarFire SoC Clock Conditioning Circuitry Driver");
MODULE_AUTHOR("Conor Dooley <conor.dooley@microchip.com>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index 6ad2954..11316c3 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -106,7 +106,8 @@ static void psci_pd_remove(void)
struct psci_pd_provider *pd_provider, *it;
struct generic_pm_domain *genpd;
- list_for_each_entry_safe(pd_provider, it, &psci_pd_providers, link) {
+ list_for_each_entry_safe_reverse(pd_provider, it,
+ &psci_pd_providers, link) {
of_genpd_del_provider(pd_provider->node);
genpd = of_genpd_remove_last(pd_provider->node);
diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c
index 73140b8..c15928b 100644
--- a/drivers/firmware/arm_scmi/bus.c
+++ b/drivers/firmware/arm_scmi/bus.c
@@ -14,7 +14,6 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/device.h>
-#include <linux/of.h>
#include "common.h"
@@ -436,7 +435,7 @@ struct scmi_device *scmi_device_create(struct device_node *np,
/* Nothing to do. */
if (!phead) {
mutex_unlock(&scmi_requested_devices_mtx);
- return scmi_dev;
+ return NULL;
}
/* Walk the list of requested devices for protocol and create them */
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index d21c7ea..dbc474f 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -2221,8 +2221,8 @@ static int __scmi_xfer_info_init(struct scmi_info *sinfo,
hash_init(info->pending_xfers);
/* Allocate a bitmask sized to hold MSG_TOKEN_MAX tokens */
- info->xfer_alloc_table = devm_kcalloc(dev, BITS_TO_LONGS(MSG_TOKEN_MAX),
- sizeof(long), GFP_KERNEL);
+ info->xfer_alloc_table = devm_bitmap_zalloc(dev, MSG_TOKEN_MAX,
+ GFP_KERNEL);
if (!info->xfer_alloc_table)
return -ENOMEM;
@@ -2657,6 +2657,7 @@ static int scmi_probe(struct platform_device *pdev)
struct scmi_handle *handle;
const struct scmi_desc *desc;
struct scmi_info *info;
+ bool coex = IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT_COEX);
struct device *dev = &pdev->dev;
struct device_node *child, *np = dev->of_node;
@@ -2731,16 +2732,13 @@ static int scmi_probe(struct platform_device *pdev)
dev_warn(dev, "Failed to setup SCMI debugfs.\n");
if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) {
- bool coex =
- IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT_COEX);
-
ret = scmi_debugfs_raw_mode_setup(info);
if (!coex) {
if (ret)
goto clear_dev_req_notifier;
- /* Bail out anyway when coex enabled */
- return ret;
+ /* Bail out anyway when coex disabled. */
+ return 0;
}
/* Coex enabled, carry on in any case. */
@@ -2764,6 +2762,8 @@ static int scmi_probe(struct platform_device *pdev)
ret = scmi_protocol_acquire(handle, SCMI_PROTOCOL_BASE);
if (ret) {
dev_err(dev, "unable to communicate with SCMI\n");
+ if (coex)
+ return 0;
goto notification_exit;
}
diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c
index 0d9c953..112c285 100644
--- a/drivers/firmware/arm_scmi/mailbox.c
+++ b/drivers/firmware/arm_scmi/mailbox.c
@@ -52,6 +52,39 @@ static bool mailbox_chan_available(struct device_node *of_node, int idx)
"#mbox-cells", idx, NULL);
}
+static int mailbox_chan_validate(struct device *cdev)
+{
+ int num_mb, num_sh, ret = 0;
+ struct device_node *np = cdev->of_node;
+
+ num_mb = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
+ num_sh = of_count_phandle_with_args(np, "shmem", NULL);
+ /* Bail out if mboxes and shmem descriptors are inconsistent */
+ if (num_mb <= 0 || num_sh > 2 || num_mb != num_sh) {
+ dev_warn(cdev, "Invalid channel descriptor for '%s'\n",
+ of_node_full_name(np));
+ return -EINVAL;
+ }
+
+ if (num_sh > 1) {
+ struct device_node *np_tx, *np_rx;
+
+ np_tx = of_parse_phandle(np, "shmem", 0);
+ np_rx = of_parse_phandle(np, "shmem", 1);
+ /* SCMI Tx and Rx shared mem areas have to be distinct */
+ if (!np_tx || !np_rx || np_tx == np_rx) {
+ dev_warn(cdev, "Invalid shmem descriptor for '%s'\n",
+ of_node_full_name(np));
+ ret = -EINVAL;
+ }
+
+ of_node_put(np_tx);
+ of_node_put(np_rx);
+ }
+
+ return ret;
+}
+
static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
bool tx)
{
@@ -64,6 +97,10 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
resource_size_t size;
struct resource res;
+ ret = mailbox_chan_validate(cdev);
+ if (ret)
+ return ret;
+
smbox = devm_kzalloc(dev, sizeof(*smbox), GFP_KERNEL);
if (!smbox)
return -ENOMEM;
diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c
index f54e6fd..f80a9af 100644
--- a/drivers/firmware/efi/earlycon.c
+++ b/drivers/firmware/efi/earlycon.c
@@ -215,6 +215,14 @@ efi_earlycon_write(struct console *con, const char *str, unsigned int num)
}
}
+static bool __initdata fb_probed;
+
+void __init efi_earlycon_reprobe(void)
+{
+ if (fb_probed)
+ setup_earlycon("efifb");
+}
+
static int __init efi_earlycon_setup(struct earlycon_device *device,
const char *opt)
{
@@ -222,15 +230,17 @@ static int __init efi_earlycon_setup(struct earlycon_device *device,
u16 xres, yres;
u32 i;
- if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
+ fb_wb = opt && !strcmp(opt, "ram");
+
+ if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) {
+ fb_probed = true;
return -ENODEV;
+ }
fb_base = screen_info.lfb_base;
if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
fb_base |= (u64)screen_info.ext_lfb_base << 32;
- fb_wb = opt && !strcmp(opt, "ram");
-
si = &screen_info;
xres = si->lfb_width;
yres = si->lfb_height;
diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c
index 2c16080..ef0820f 100644
--- a/drivers/firmware/efi/efi-init.c
+++ b/drivers/firmware/efi/efi-init.c
@@ -72,6 +72,9 @@ static void __init init_screen_info(void)
if (memblock_is_map_memory(screen_info.lfb_base))
memblock_mark_nomap(screen_info.lfb_base,
screen_info.lfb_size);
+
+ if (IS_ENABLED(CONFIG_EFI_EARLYCON))
+ efi_earlycon_reprobe();
}
}
diff --git a/drivers/firmware/efi/libstub/Makefile.zboot b/drivers/firmware/efi/libstub/Makefile.zboot
index 43e9a4c..ccdd6a1 100644
--- a/drivers/firmware/efi/libstub/Makefile.zboot
+++ b/drivers/firmware/efi/libstub/Makefile.zboot
@@ -44,4 +44,4 @@
$(obj)/vmlinuz.efi: $(obj)/vmlinuz.efi.elf FORCE
$(call if_changed,objcopy)
-targets += zboot-header.o vmlinuz.o vmlinuz.efi.elf vmlinuz.efi
+targets += zboot-header.o vmlinuz vmlinuz.o vmlinuz.efi.elf vmlinuz.efi
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index d4a6b12..770b8ec 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -85,8 +85,10 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
}
}
- if (image->image_base != _text)
+ if (image->image_base != _text) {
efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
+ image->image_base = _text;
+ }
if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN))
efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n",
@@ -139,6 +141,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
*image_addr = *reserve_addr;
memcpy((void *)*image_addr, _text, kernel_size);
caches_clean_inval_pou(*image_addr, *image_addr + kernel_codesize);
+ efi_remap_image(*image_addr, *reserve_size, kernel_codesize);
return EFI_SUCCESS;
}
diff --git a/drivers/firmware/efi/libstub/arm64.c b/drivers/firmware/efi/libstub/arm64.c
index 3997702..8aad8c4 100644
--- a/drivers/firmware/efi/libstub/arm64.c
+++ b/drivers/firmware/efi/libstub/arm64.c
@@ -16,20 +16,43 @@
static bool system_needs_vamap(void)
{
- const u8 *type1_family = efi_get_smbios_string(1, family);
+ const struct efi_smbios_type4_record *record;
+ const u32 __aligned(1) *socid;
+ const u8 *version;
/*
* Ampere eMAG, Altra, and Altra Max machines crash in SetTime() if
- * SetVirtualAddressMap() has not been called prior.
+ * SetVirtualAddressMap() has not been called prior. Most Altra systems
+ * can be identified by the SMCCC soc ID, which is conveniently exposed
+ * via the type 4 SMBIOS records. Otherwise, test the processor version
+ * field. eMAG systems all appear to have the processor version field
+ * set to "eMAG".
*/
- if (!type1_family || (
- strcmp(type1_family, "eMAG") &&
- strcmp(type1_family, "Altra") &&
- strcmp(type1_family, "Altra Max")))
+ record = (struct efi_smbios_type4_record *)efi_get_smbios_record(4);
+ if (!record)
return false;
- efi_warn("Working around broken SetVirtualAddressMap()\n");
- return true;
+ socid = (u32 *)record->processor_id;
+ switch (*socid & 0xffff000f) {
+ static char const altra[] = "Ampere(TM) Altra(TM) Processor";
+ static char const emag[] = "eMAG";
+
+ default:
+ version = efi_get_smbios_string(&record->header, 4,
+ processor_version);
+ if (!version || (strncmp(version, altra, sizeof(altra) - 1) &&
+ strncmp(version, emag, sizeof(emag) - 1)))
+ break;
+
+ fallthrough;
+
+ case 0x0a160001: // Altra
+ case 0x0a160002: // Altra Max
+ efi_warn("Working around broken SetVirtualAddressMap()\n");
+ return true;
+ }
+
+ return false;
}
efi_status_t check_platform_features(void)
diff --git a/drivers/firmware/efi/libstub/efi-stub-entry.c b/drivers/firmware/efi/libstub/efi-stub-entry.c
index 5245c4f..cc4dcae 100644
--- a/drivers/firmware/efi/libstub/efi-stub-entry.c
+++ b/drivers/firmware/efi/libstub/efi-stub-entry.c
@@ -5,6 +5,15 @@
#include "efistub.h"
+static unsigned long screen_info_offset;
+
+struct screen_info *alloc_screen_info(void)
+{
+ if (IS_ENABLED(CONFIG_ARM))
+ return __alloc_screen_info();
+ return (void *)&screen_info + screen_info_offset;
+}
+
/*
* EFI entry point for the generic EFI stub used by ARM, arm64, RISC-V and
* LoongArch. This is the entrypoint that is described in the PE/COFF header
@@ -56,6 +65,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
return status;
}
+ screen_info_offset = image_addr - (unsigned long)image->image_base;
+
status = efi_stub_common(handle, image, image_addr, cmdline_ptr);
efi_free(image_size, image_addr);
diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c
index 2955c1a..f9c1e8a 100644
--- a/drivers/firmware/efi/libstub/efi-stub.c
+++ b/drivers/firmware/efi/libstub/efi-stub.c
@@ -47,11 +47,6 @@
static u64 virtmap_base = EFI_RT_VIRTUAL_BASE;
static bool flat_va_mapping = (EFI_RT_VIRTUAL_OFFSET != 0);
-struct screen_info * __weak alloc_screen_info(void)
-{
- return &screen_info;
-}
-
void __weak free_screen_info(struct screen_info *si)
{
}
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 6bd3bb8..148013b 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -1062,6 +1062,7 @@ efi_enable_reset_attack_mitigation(void) { }
void efi_retrieve_tpm2_eventlog(void);
struct screen_info *alloc_screen_info(void);
+struct screen_info *__alloc_screen_info(void);
void free_screen_info(struct screen_info *si);
void efi_cache_sync_image(unsigned long image_base,
@@ -1074,6 +1075,8 @@ struct efi_smbios_record {
u16 handle;
};
+const struct efi_smbios_record *efi_get_smbios_record(u8 type);
+
struct efi_smbios_type1_record {
struct efi_smbios_record header;
@@ -1087,14 +1090,46 @@ struct efi_smbios_type1_record {
u8 family;
};
-#define efi_get_smbios_string(__type, __name) ({ \
- int size = sizeof(struct efi_smbios_type ## __type ## _record); \
+struct efi_smbios_type4_record {
+ struct efi_smbios_record header;
+
+ u8 socket;
+ u8 processor_type;
+ u8 processor_family;
+ u8 processor_manufacturer;
+ u8 processor_id[8];
+ u8 processor_version;
+ u8 voltage;
+ u16 external_clock;
+ u16 max_speed;
+ u16 current_speed;
+ u8 status;
+ u8 processor_upgrade;
+ u16 l1_cache_handle;
+ u16 l2_cache_handle;
+ u16 l3_cache_handle;
+ u8 serial_number;
+ u8 asset_tag;
+ u8 part_number;
+ u8 core_count;
+ u8 enabled_core_count;
+ u8 thread_count;
+ u16 processor_characteristics;
+ u16 processor_family2;
+ u16 core_count2;
+ u16 enabled_core_count2;
+ u16 thread_count2;
+ u16 thread_enabled;
+};
+
+#define efi_get_smbios_string(__record, __type, __name) ({ \
int off = offsetof(struct efi_smbios_type ## __type ## _record, \
__name); \
- __efi_get_smbios_string(__type, off, size); \
+ __efi_get_smbios_string((__record), __type, off); \
})
-const u8 *__efi_get_smbios_string(u8 type, int offset, int recsize);
+const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record,
+ u8 type, int offset);
void efi_remap_image(unsigned long image_base, unsigned alloc_size,
unsigned long code_size);
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index 1692d19..32c7a54 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -101,6 +101,7 @@ efi_status_t efi_random_alloc(unsigned long size,
* to calculate the randomly chosen address, and allocate it directly
* using EFI_ALLOCATE_ADDRESS.
*/
+ status = EFI_OUT_OF_RESOURCES;
for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) {
efi_memory_desc_t *md = (void *)map->map + map_offset;
efi_physical_addr_t target;
diff --git a/drivers/firmware/efi/libstub/screen_info.c b/drivers/firmware/efi/libstub/screen_info.c
index 8e76a8b..4be1c4d 100644
--- a/drivers/firmware/efi/libstub/screen_info.c
+++ b/drivers/firmware/efi/libstub/screen_info.c
@@ -15,18 +15,11 @@
* early, but it only works if the EFI stub is part of the core kernel image
* itself. The zboot decompressor can only use the configuration table
* approach.
- *
- * In order to support both methods from the same build of the EFI stub
- * library, provide this dummy global definition of struct screen_info. If it
- * is required to satisfy a link dependency, it means we need to override the
- * __weak alloc and free methods with the ones below, and those will be pulled
- * in as well.
*/
-struct screen_info screen_info;
static efi_guid_t screen_info_guid = LINUX_EFI_SCREEN_INFO_TABLE_GUID;
-struct screen_info *alloc_screen_info(void)
+struct screen_info *__alloc_screen_info(void)
{
struct screen_info *si;
efi_status_t status;
diff --git a/drivers/firmware/efi/libstub/smbios.c b/drivers/firmware/efi/libstub/smbios.c
index 460418b7..c217de2 100644
--- a/drivers/firmware/efi/libstub/smbios.c
+++ b/drivers/firmware/efi/libstub/smbios.c
@@ -22,21 +22,30 @@ struct efi_smbios_protocol {
u8 minor_version;
};
-const u8 *__efi_get_smbios_string(u8 type, int offset, int recsize)
+const struct efi_smbios_record *efi_get_smbios_record(u8 type)
{
struct efi_smbios_record *record;
efi_smbios_protocol_t *smbios;
efi_status_t status;
u16 handle = 0xfffe;
- const u8 *strtable;
status = efi_bs_call(locate_protocol, &EFI_SMBIOS_PROTOCOL_GUID, NULL,
(void **)&smbios) ?:
efi_call_proto(smbios, get_next, &handle, &type, &record, NULL);
if (status != EFI_SUCCESS)
return NULL;
+ return record;
+}
- strtable = (u8 *)record + recsize;
+const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record,
+ u8 type, int offset)
+{
+ const u8 *strtable;
+
+ if (!record)
+ return NULL;
+
+ strtable = (u8 *)record + record->length;
for (int i = 1; i < ((u8 *)record)[offset]; i++) {
int len = strlen(strtable);
diff --git a/drivers/firmware/efi/libstub/zboot-header.S b/drivers/firmware/efi/libstub/zboot-header.S
index ec4525d..445cb646 100644
--- a/drivers/firmware/efi/libstub/zboot-header.S
+++ b/drivers/firmware/efi/libstub/zboot-header.S
@@ -63,7 +63,7 @@
.long .Lefi_header_end - .Ldoshdr
.long 0
.short IMAGE_SUBSYSTEM_EFI_APPLICATION
- .short 0
+ .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT
#ifdef CONFIG_64BIT
.quad 0, 0, 0, 0
#else
diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c
index ba234e0..6105e5e 100644
--- a/drivers/firmware/efi/libstub/zboot.c
+++ b/drivers/firmware/efi/libstub/zboot.c
@@ -57,6 +57,11 @@ void __weak efi_cache_sync_image(unsigned long image_base,
// executable code loaded into memory to be safe for execution.
}
+struct screen_info *alloc_screen_info(void)
+{
+ return __alloc_screen_info();
+}
+
asmlinkage efi_status_t __efiapi
efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab)
{
diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c
index f06fdac..456d0e5 100644
--- a/drivers/firmware/efi/sysfb_efi.c
+++ b/drivers/firmware/efi/sysfb_efi.c
@@ -272,6 +272,14 @@ static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
"IdeaPad Duet 3 10IGL5"),
},
},
+ {
+ /* Lenovo Yoga Book X91F / X91L */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ /* Non exact match to match F + L versions */
+ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
+ },
+ },
{},
};
@@ -341,7 +349,7 @@ static const struct fwnode_operations efifb_fwnode_ops = {
#ifdef CONFIG_EFI
static struct fwnode_handle efifb_fwnode;
-__init void sysfb_apply_efi_quirks(struct platform_device *pd)
+__init void sysfb_apply_efi_quirks(void)
{
if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
!(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
@@ -355,7 +363,10 @@ __init void sysfb_apply_efi_quirks(struct platform_device *pd)
screen_info.lfb_height = temp;
screen_info.lfb_linelength = 4 * screen_info.lfb_width;
}
+}
+__init void sysfb_set_efifb_fwnode(struct platform_device *pd)
+{
if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI && IS_ENABLED(CONFIG_PCI)) {
fwnode_init(&efifb_fwnode, &efifb_fwnode_ops);
pd->dev.fwnode = &efifb_fwnode;
diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index 468d4d5..b1e11f85 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -1479,7 +1479,7 @@ static int qcom_scm_probe(struct platform_device *pdev)
init_completion(&__scm->waitq_comp);
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq < 0) {
if (irq != -ENXIO)
return irq;
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
index 3fd3563..3c197db 100644
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -81,6 +81,8 @@ static __init int sysfb_init(void)
if (disabled)
goto unlock_mutex;
+ sysfb_apply_efi_quirks();
+
/* try to create a simple-framebuffer device */
compatible = sysfb_parse_mode(si, &mode);
if (compatible) {
@@ -107,7 +109,7 @@ static __init int sysfb_init(void)
goto unlock_mutex;
}
- sysfb_apply_efi_quirks(pd);
+ sysfb_set_efifb_fwnode(pd);
ret = platform_device_add_data(pd, si, sizeof(*si));
if (ret)
diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c
index ce9c007..82c64cb 100644
--- a/drivers/firmware/sysfb_simplefb.c
+++ b/drivers/firmware/sysfb_simplefb.c
@@ -141,7 +141,7 @@ __init struct platform_device *sysfb_create_simplefb(const struct screen_info *s
if (!pd)
return ERR_PTR(-ENOMEM);
- sysfb_apply_efi_quirks(pd);
+ sysfb_set_efifb_fwnode(pd);
ret = platform_device_add_resources(pd, &res, 1);
if (ret)
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index acd83d2..ce86a18 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -206,7 +206,7 @@ static int do_feature_check_call(const u32 api_id)
}
/* Add new entry if not present */
- feature_data = kmalloc(sizeof(*feature_data), GFP_KERNEL);
+ feature_data = kmalloc(sizeof(*feature_data), GFP_ATOMIC);
if (!feature_data)
return -ENOMEM;
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index d8a421c..31ae0ad 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -536,6 +536,9 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
if (ACPI_FAILURE(status))
return;
+ if (acpi_quirk_skip_gpio_event_handlers())
+ return;
+
acpi_walk_resources(handle, METHOD_NAME__AEI,
acpi_gpiochip_alloc_event, acpi_gpio);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 164141b..39018f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1272,6 +1272,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
+bool amdgpu_device_aspm_support_quirk(void);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
u64 num_vis_bytes);
@@ -1391,10 +1392,12 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
void amdgpu_acpi_detect(void);
#else
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
+static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
static inline void amdgpu_acpi_detect(void) { }
static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
@@ -1405,11 +1408,9 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
-bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
#else
static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
-static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index d4196fc..60b1857 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -971,6 +971,29 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
return true;
}
+
+/**
+ * amdgpu_acpi_should_gpu_reset
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if should reset GPU, false if not
+ */
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+ return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+#else
+ return true;
+#endif
+}
+
/*
* amdgpu_acpi_detect - detect ACPI ATIF/ATCS methods
*
@@ -1043,24 +1066,6 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
}
/**
- * amdgpu_acpi_should_gpu_reset
- *
- * @adev: amdgpu_device_pointer
- *
- * returns true if should reset GPU, false if not
- */
-bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
-{
- if (adev->flags & AMD_IS_APU)
- return false;
-
- if (amdgpu_sriov_vf(adev))
- return false;
-
- return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
-}
-
-/**
* amdgpu_acpi_is_s0ix_active
*
* @adev: amdgpu_device_pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c4a4e2f..3d98fc2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -80,6 +80,10 @@
#include <drm/drm_drv.h>
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#endif
+
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -1356,6 +1360,17 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
return pcie_aspm_enabled(adev->pdev);
}
+bool amdgpu_device_aspm_support_quirk(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+#else
+ return true;
+#endif
+}
+
/* if we get transitioned to only one device, take VGA back */
/**
* amdgpu_device_vga_set_decode - enable/disable vga decode
@@ -4145,8 +4160,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
DRM_WARN("smart shift update failed\n");
- drm_kms_helper_poll_disable(dev);
-
if (fbcon)
drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
@@ -4243,8 +4256,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
if (fbcon)
drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
- drm_kms_helper_poll_enable(dev);
-
amdgpu_ras_resume(adev);
if (adev->mode_info.num_crtc) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 503f89a..d60fe7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1618,6 +1618,8 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
struct drm_connector_list_iter iter;
int r;
+ drm_kms_helper_poll_disable(dev);
+
/* turn off display hw */
drm_modeset_lock_all(dev);
drm_connector_list_iter_begin(dev, &iter);
@@ -1694,6 +1696,8 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
drm_modeset_unlock_all(dev);
+ drm_kms_helper_poll_enable(dev);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f5ffca2..ba5def3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2467,7 +2467,10 @@ static int amdgpu_pmops_freeze(struct device *dev)
adev->in_s4 = false;
if (r)
return r;
- return amdgpu_asic_reset(adev);
+
+ if (amdgpu_acpi_should_gpu_reset(adev))
+ return amdgpu_asic_reset(adev);
+ return 0;
}
static int amdgpu_pmops_thaw(struct device *dev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index faff4a3..f52d0ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -678,6 +678,15 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
ptr = &ring->fence_drv.fences[i];
old = rcu_dereference_protected(*ptr, 1);
if (old && old->ops == &amdgpu_job_fence_ops) {
+ struct amdgpu_job *job;
+
+ /* For non-scheduler bad job, i.e. failed ib test, we need to signal
+ * it right here or we won't be able to track them in fence_drv
+ * and they will remain unsignaled during sa_bo free.
+ */
+ job = container_of(old, struct amdgpu_job, hw_fence);
+ if (!job->base.s_fence && !dma_fence_is_signaled(old))
+ dma_fence_signal(old);
RCU_INIT_POINTER(*ptr, NULL);
dma_fence_put(old);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 25217b0..e7974de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -26,6 +26,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
+#include <linux/dmi.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
#include <drm/drm_drv.h>
@@ -114,6 +115,24 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
+ /*
+ * Some Steam Deck's BIOS versions are incompatible with the
+ * indirect SRAM mode, leading to amdgpu being unable to get
+ * properly probed (and even potentially crashing the kernel).
+ * Hence, check for these versions here - notice this is
+ * restricted to Vangogh (Deck's APU).
+ */
+ if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 2)) {
+ const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
+
+ if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
+ !strncmp("F7A0114", bios_ver, 7))) {
+ adev->vcn.indirect_sram = false;
+ dev_info(adev->dev,
+ "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
+ }
+ }
+
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b9e9480..4f7bab5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -124,6 +124,8 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
/* Indirect Reg Access enabled */
AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
+ /* AV1 Support MODE*/
+ AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
};
enum AMDGIM_REG_ACCESS_FLAG {
@@ -322,6 +324,8 @@ static inline bool is_virtual_machine(void)
((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
#define amdgpu_sriov_is_normal(adev) \
((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
+#define amdgpu_sriov_is_av1_support(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 6c97148..24d42d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -93,7 +93,8 @@ union amd_sriov_msg_feature_flags {
uint32_t mm_bw_management : 1;
uint32_t pp_one_vf_mode : 1;
uint32_t reg_indirect_acc : 1;
- uint32_t reserved : 26;
+ uint32_t av1_support : 1;
+ uint32_t reserved : 25;
} flags;
uint32_t all;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 3bf697a..ecf8ceb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1287,6 +1287,11 @@ static int gfx_v11_0_sw_init(void *handle)
break;
}
+ /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) &&
+ amdgpu_sriov_is_pp_one_vf(adev))
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
+
/* EOP Event */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
@@ -4655,6 +4660,14 @@ static bool gfx_v11_0_check_soft_reset(void *handle)
return false;
}
+static int gfx_v11_0_post_soft_reset(void *handle)
+{
+ /**
+ * GFX soft reset will impact MES, need resume MES when do GFX soft reset
+ */
+ return amdgpu_mes_resume((struct amdgpu_device *)handle);
+}
+
static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock;
@@ -6166,6 +6179,7 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.wait_for_idle = gfx_v11_0_wait_for_idle,
.soft_reset = gfx_v11_0_soft_reset,
.check_soft_reset = gfx_v11_0_check_soft_reset,
+ .post_soft_reset = gfx_v11_0_post_soft_reset,
.set_clockgating_state = gfx_v11_0_set_clockgating_state,
.set_powergating_state = gfx_v11_0_set_powergating_state,
.get_clockgating_state = gfx_v11_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 855d390..ebe0e2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -578,7 +578,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
static void nv_program_aspm(struct amdgpu_device *adev)
{
- if (!amdgpu_device_should_use_aspm(adev))
+ if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk())
return;
if (!(adev->flags & AMD_IS_APU) &&
@@ -1055,8 +1055,8 @@ static int nv_common_late_init(void *handle)
amdgpu_virt_update_sriov_video_codec(adev,
sriov_sc_video_codecs_encode_array,
ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
- sriov_sc_video_codecs_decode_array_vcn1,
- ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1));
+ sriov_sc_video_codecs_decode_array_vcn0,
+ ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0));
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 061793d..c82b3a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -102,6 +102,59 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 =
.codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1,
};
+/* SRIOV SOC21, not const since data is controlled by host */
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+};
+
static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
@@ -112,16 +165,31 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 2):
case IP_VERSION(4, 0, 4):
- if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
- if (encode)
- *codecs = &vcn_4_0_0_video_codecs_encode_vcn1;
- else
- *codecs = &vcn_4_0_0_video_codecs_decode_vcn1;
+ if (amdgpu_sriov_vf(adev)) {
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+ !amdgpu_sriov_is_av1_support(adev)) {
+ if (encode)
+ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn1;
+ else
+ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn0;
+ }
} else {
- if (encode)
- *codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
- else
- *codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)) {
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn1;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+ }
}
return 0;
default:
@@ -730,8 +798,23 @@ static int soc21_common_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_get_irq(adev);
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+ !amdgpu_sriov_is_av1_support(adev)) {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+ sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1));
+ } else {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+ sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0));
+ }
+ }
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 12ef782..ceab878 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -81,10 +81,6 @@
#include "mxgpu_vi.h"
#include "amdgpu_dm.h"
-#if IS_ENABLED(CONFIG_X86)
-#include <asm/intel-family.h>
-#endif
-
#define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6
#define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L
#define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L
@@ -1138,24 +1134,13 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
WREG32_PCIE(ixPCIE_LC_CNTL, data);
}
-static bool aspm_support_quirk_check(void)
-{
-#if IS_ENABLED(CONFIG_X86)
- struct cpuinfo_x86 *c = &cpu_data(0);
-
- return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
-#else
- return true;
-#endif
-}
-
static void vi_program_aspm(struct amdgpu_device *adev)
{
u32 data, data1, orig;
bool bL1SS = false;
bool bClkReqSupport = true;
- if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
+ if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk())
return;
if (adev->flags & AMD_IS_APU ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a0e30f2..de310ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1312,14 +1312,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
args->n_success = i+1;
}
- mutex_unlock(&p->mutex);
-
err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
+ mutex_unlock(&p->mutex);
+
/* Flush TLBs after waiting for the page table updates to complete */
for (i = 0; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
@@ -1335,9 +1335,9 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
bind_process_to_device_failed:
get_mem_obj_from_handle_failed:
map_memory_to_gpu_failed:
+sync_memory_failed:
mutex_unlock(&p->mutex);
copy_from_user_failed:
-sync_memory_failed:
kfree(devices_arr);
return err;
@@ -1351,6 +1351,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
void *mem;
long err = 0;
uint32_t *devices_arr = NULL, i;
+ bool flush_tlb;
if (!args->n_devices) {
pr_debug("Device IDs array empty\n");
@@ -1403,16 +1404,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
args->n_success = i+1;
}
- mutex_unlock(&p->mutex);
- if (kfd_flush_tlb_after_unmap(pdd->dev)) {
+ flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev);
+ if (flush_tlb) {
err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
(struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
+ }
+ mutex_unlock(&p->mutex);
+ if (flush_tlb) {
/* Flush TLBs after waiting for the page table updates to complete */
for (i = 0; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
@@ -1428,9 +1432,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
bind_process_to_device_failed:
get_mem_obj_from_handle_failed:
unmap_memory_from_gpu_failed:
+sync_memory_failed:
mutex_unlock(&p->mutex);
copy_from_user_failed:
-sync_memory_failed:
kfree(devices_arr);
return err;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3de7f61..ec70a16 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -59,6 +59,7 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
+static int kfd_resume_iommu(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_dev *kfd);
static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
@@ -624,7 +625,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
svm_migrate_init(kfd->adev);
- if (kgd2kfd_resume_iommu(kfd))
+ if (kfd_resume_iommu(kfd))
goto device_iommu_error;
if (kfd_resume(kfd))
@@ -773,6 +774,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
{
+ if (!kfd->init_complete)
+ return 0;
+
+ return kfd_resume_iommu(kfd);
+}
+
+static int kfd_resume_iommu(struct kfd_dev *kfd)
+{
int err = 0;
err = kfd_iommu_resume(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index de8ce72..5493390 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -289,7 +289,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
static int
svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch)
+ dma_addr_t *scratch, uint64_t ttm_res_offset)
{
uint64_t npages = migrate->npages;
struct device *dev = adev->dev;
@@ -299,19 +299,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
uint64_t i, j;
int r;
- pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
- prange->last);
+ pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start,
+ prange->last, ttm_res_offset);
src = scratch;
dst = (uint64_t *)(scratch + npages);
- r = svm_range_vram_node_new(adev, prange, true);
- if (r) {
- dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
- goto out;
- }
-
- amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+ amdgpu_res_first(prange->ttm_res, ttm_res_offset,
npages << PAGE_SHIFT, &cursor);
for (i = j = 0; i < npages; i++) {
struct page *spage;
@@ -391,14 +385,14 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate->dst[i + 3] = 0;
}
#endif
-out:
+
return r;
}
static long
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
- uint64_t end, uint32_t trigger)
+ uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
@@ -451,7 +445,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
else
pr_debug("0x%lx pages migrated\n", cpages);
- r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
+ r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, ttm_res_offset);
migrate_vma_pages(&migrate);
pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
@@ -499,6 +493,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
+ uint64_t ttm_res_offset;
unsigned long cpages = 0;
long r = 0;
@@ -520,6 +515,13 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
+ r = svm_range_vram_node_new(adev, prange, true);
+ if (r) {
+ dev_dbg(adev->dev, "fail %ld to alloc vram\n", r);
+ return r;
+ }
+ ttm_res_offset = prange->offset << PAGE_SHIFT;
+
for (addr = start; addr < end;) {
unsigned long next;
@@ -528,18 +530,21 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
break;
next = min(vma->vm_end, end);
- r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
+ r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
} else {
cpages += r;
}
+ ttm_res_offset += next - addr;
addr = next;
}
if (cpages)
prange->actual_loc = best_loc;
+ else
+ svm_range_vram_node_free(prange);
return r < 0 ? r : 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 09b966d..aee2212 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -77,6 +77,7 @@ static int kfd_init(void)
static void kfd_exit(void)
{
+ kfd_cleanup_processes();
kfd_debugfs_fini();
kfd_process_destroy_wq();
kfd_procfs_shutdown();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index bfa30d1..7e4d992 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -928,6 +928,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev);
int kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
+void kfd_cleanup_processes(void);
struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *task);
struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 7acd55a..4208e0f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn)
kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
}
+static void kfd_process_notifier_release_internal(struct kfd_process *p)
+{
+ cancel_delayed_work_sync(&p->eviction_work);
+ cancel_delayed_work_sync(&p->restore_work);
+
+ /* Indicate to other users that MM is no longer valid */
+ p->mm = NULL;
+
+ mmu_notifier_put(&p->mmu_notifier);
+}
+
static void kfd_process_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
@@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
return;
mutex_lock(&kfd_processes_mutex);
+ /*
+ * Do early return if table is empty.
+ *
+ * This could potentially happen if this function is called concurrently
+ * by mmu_notifier and by kfd_cleanup_pocesses.
+ *
+ */
+ if (hash_empty(kfd_processes_table)) {
+ mutex_unlock(&kfd_processes_mutex);
+ return;
+ }
hash_del_rcu(&p->kfd_processes);
mutex_unlock(&kfd_processes_mutex);
synchronize_srcu(&kfd_processes_srcu);
- cancel_delayed_work_sync(&p->eviction_work);
- cancel_delayed_work_sync(&p->restore_work);
-
- /* Indicate to other users that MM is no longer valid */
- p->mm = NULL;
-
- mmu_notifier_put(&p->mmu_notifier);
+ kfd_process_notifier_release_internal(p);
}
static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
@@ -1200,6 +1216,43 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
.free_notifier = kfd_process_free_notifier,
};
+/*
+ * This code handles the case when driver is being unloaded before all
+ * mm_struct are released. We need to safely free the kfd_process and
+ * avoid race conditions with mmu_notifier that might try to free them.
+ *
+ */
+void kfd_cleanup_processes(void)
+{
+ struct kfd_process *p;
+ struct hlist_node *p_temp;
+ unsigned int temp;
+ HLIST_HEAD(cleanup_list);
+
+ /*
+ * Move all remaining kfd_process from the process table to a
+ * temp list for processing. Once done, callback from mmu_notifier
+ * release will not see the kfd_process in the table and do early return,
+ * avoiding double free issues.
+ */
+ mutex_lock(&kfd_processes_mutex);
+ hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
+ hash_del_rcu(&p->kfd_processes);
+ synchronize_srcu(&kfd_processes_srcu);
+ hlist_add_head(&p->kfd_processes, &cleanup_list);
+ }
+ mutex_unlock(&kfd_processes_mutex);
+
+ hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes)
+ kfd_process_notifier_release_internal(p);
+
+ /*
+ * Ensures that all outstanding free_notifier get called, triggering
+ * the release of the kfd_process struct.
+ */
+ mmu_notifier_synchronize();
+}
+
static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
{
unsigned long offset;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 5137476..4236539 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -218,8 +218,8 @@ static int init_user_queue(struct process_queue_manager *pqm,
return 0;
cleanup:
- if (dev->shared_resources.enable_mes)
- uninit_queue(*q);
+ uninit_queue(*q);
+ *q = NULL;
return retval;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 009ef91..a01fd41 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5105,9 +5105,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
for (; flip_addrs->dirty_rect_count < num_clips; clips++)
fill_dc_dirty_rect(new_plane_state->plane,
- &dirty_rects[i], clips->x1,
- clips->y1, clips->x2 - clips->x1,
- clips->y2 - clips->y1,
+ &dirty_rects[flip_addrs->dirty_rect_count],
+ clips->x1, clips->y1,
+ clips->x2 - clips->x1, clips->y2 - clips->y1,
&flip_addrs->dirty_rect_count,
false);
return;
@@ -7244,7 +7244,6 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
if (!aconnector->mst_root)
drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16);
- /* This defaults to the max in the range, but we want 8bpc for non-edp. */
aconnector->base.state->max_bpc = 16;
aconnector->base.state->max_requested_bpc = aconnector->base.state->max_bpc;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 8e572f0..4abfd2c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -561,7 +561,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
link->dp.mst_enabled = config->mst_enabled;
link->dp.usb4_enabled = config->usb4_enabled;
display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
- link->adjust.auth_delay = 0;
+ link->adjust.auth_delay = 2;
link->adjust.hdcp1.disable = 0;
conn_state = aconnector->base.state;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
index 3b4d4d6..df787fc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
@@ -998,8 +998,5 @@ void dcn30_prepare_bandwidth(struct dc *dc,
dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz);
dcn20_prepare_bandwidth(dc, context);
-
- dc_dmub_srv_p_state_delegate(dc,
- context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching, context);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
index e4472c6..3fb4bcc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
@@ -271,8 +271,7 @@ static void dccg32_set_dpstreamclk(
dccg32_set_dtbclk_p_src(dccg, src, otg_inst);
/* enabled to select one of the DTBCLKs for pipe */
- switch (otg_inst)
- {
+ switch (dp_hpo_inst) {
case 0:
REG_UPDATE_2(DPSTREAMCLK_CNTL,
DPSTREAMCLK0_EN,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index 16f8921..9d14045 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -1104,7 +1104,7 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
*k2_div = PIXEL_RATE_DIV_BY_2;
else
*k2_div = PIXEL_RATE_DIV_BY_4;
- } else if (dc_is_dp_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) {
+ } else if (dc_is_dp_signal(stream->signal)) {
if (two_pix_per_container) {
*k1_div = PIXEL_RATE_DIV_BY_1;
*k2_div = PIXEL_RATE_DIV_BY_2;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 74e50c0..4b7abb4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1915,6 +1915,7 @@ int dcn32_populate_dml_pipes_from_context(
bool subvp_in_use = false;
uint8_t is_pipe_split_expected[MAX_PIPES] = {0};
struct dc_crtc_timing *timing;
+ bool vsr_odm_support = false;
dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
@@ -1932,12 +1933,15 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+ vsr_odm_support = (res_ctx->pipe_ctx[i].stream->src.width >= 5120 &&
+ res_ctx->pipe_ctx[i].stream->src.width > res_ctx->pipe_ctx[i].stream->dst.width);
if (context->stream_count == 1 &&
context->stream_status[0].plane_count == 1 &&
!dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal) &&
is_h_timing_divisible_by_2(res_ctx->pipe_ctx[i].stream) &&
pipe->stream->timing.pix_clk_100hz * 100 > DCN3_2_VMIN_DISPCLK_HZ &&
- dc->debug.enable_single_display_2to1_odm_policy) {
+ dc->debug.enable_single_display_2to1_odm_policy &&
+ !vsr_odm_support) { //excluding 2to1 ODM combine on >= 5k vsr
pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
}
pipe_cnt++;
@@ -2182,6 +2186,7 @@ static bool dcn32_resource_construct(
dc->caps.edp_dsc_support = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
+ dc->caps.seamless_odm = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index 38216c7..f70025e 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -855,6 +855,7 @@ static bool detect_link_and_local_sink(struct dc_link *link,
struct dc_sink *prev_sink = NULL;
struct dpcd_caps prev_dpcd_caps;
enum dc_connection_type new_connection_type = dc_connection_none;
+ enum dc_connection_type pre_connection_type = link->type;
const uint32_t post_oui_delay = 30; // 30ms
DC_LOGGER_INIT(link->ctx->logger);
@@ -957,6 +958,8 @@ static bool detect_link_and_local_sink(struct dc_link *link,
}
if (!detect_dp(link, &sink_caps, reason)) {
+ link->type = pre_connection_type;
+
if (prev_sink)
dc_sink_release(prev_sink);
return false;
@@ -1244,11 +1247,16 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason)
bool is_delegated_to_mst_top_mgr = false;
enum dc_connection_type pre_link_type = link->type;
+ DC_LOGGER_INIT(link->ctx->logger);
+
is_local_sink_detect_success = detect_link_and_local_sink(link, reason);
if (is_local_sink_detect_success && link->local_sink)
verify_link_capability(link, link->local_sink, reason);
+ DC_LOG_DC("%s: link_index=%d is_local_sink_detect_success=%d pre_link_type=%d link_type=%d\n", __func__,
+ link->link_index, is_local_sink_detect_success, pre_link_type, link->type);
+
if (is_local_sink_detect_success && link->local_sink &&
dc_is_dp_signal(link->local_sink->sink_signal) &&
link->dpcd_caps.is_mst_capable)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
index f774017..2162ecd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
@@ -27,7 +27,7 @@
// *** IMPORTANT ***
// SMU TEAM: Always increment the interface version if
// any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 7
+#define PMFW_DRIVER_IF_VERSION 8
typedef struct {
int32_t value;
@@ -198,7 +198,7 @@ typedef struct {
uint16_t SkinTemp;
uint16_t DeviceState;
uint16_t CurTemp; //[centi-Celsius]
- uint16_t spare2;
+ uint16_t FilterAlphaValue;
uint16_t AverageGfxclkFrequency;
uint16_t AverageFclkFrequency;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 1c0ae2c..f085cb9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -29,7 +29,7 @@
#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x37
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x08
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x37
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 697e98a..75f1868 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2143,16 +2143,9 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu)
(OverDriveTable_t *)smu->smu_table.boot_overdrive_table;
OverDriveTable_t *user_od_table =
(OverDriveTable_t *)smu->smu_table.user_overdrive_table;
+ OverDriveTable_t user_od_table_bak;
int ret = 0;
- /*
- * For S3/S4/Runpm resume, no need to setup those overdrive tables again as
- * - either they already have the default OD settings got during cold bootup
- * - or they have some user customized OD settings which cannot be overwritten
- */
- if (smu->adev->in_suspend)
- return 0;
-
ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE,
0, (void *)boot_od_table, false);
if (ret) {
@@ -2163,7 +2156,23 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu)
sienna_cichlid_dump_od_table(smu, boot_od_table);
memcpy(od_table, boot_od_table, sizeof(OverDriveTable_t));
- memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t));
+
+ /*
+ * For S3/S4/Runpm resume, we need to setup those overdrive tables again,
+ * but we have to preserve user defined values in "user_od_table".
+ */
+ if (!smu->adev->in_suspend) {
+ memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t));
+ smu->user_dpm_profile.user_od = false;
+ } else if (smu->user_dpm_profile.user_od) {
+ memcpy(&user_od_table_bak, user_od_table, sizeof(OverDriveTable_t));
+ memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t));
+ user_od_table->GfxclkFmin = user_od_table_bak.GfxclkFmin;
+ user_od_table->GfxclkFmax = user_od_table_bak.GfxclkFmax;
+ user_od_table->UclkFmin = user_od_table_bak.UclkFmin;
+ user_od_table->UclkFmax = user_od_table_bak.UclkFmax;
+ user_od_table->VddGfxOffset = user_od_table_bak.VddGfxOffset;
+ }
return 0;
}
@@ -2373,6 +2382,20 @@ static int sienna_cichlid_od_edit_dpm_table(struct smu_context *smu,
return ret;
}
+static int sienna_cichlid_restore_user_od_settings(struct smu_context *smu)
+{
+ struct smu_table_context *table_context = &smu->smu_table;
+ OverDriveTable_t *od_table = table_context->overdrive_table;
+ OverDriveTable_t *user_od_table = table_context->user_overdrive_table;
+ int res;
+
+ res = smu_v11_0_restore_user_od_settings(smu);
+ if (res == 0)
+ memcpy(od_table, user_od_table, sizeof(OverDriveTable_t));
+
+ return res;
+}
+
static int sienna_cichlid_run_btc(struct smu_context *smu)
{
int res;
@@ -4400,7 +4423,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
.set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
.set_default_od_settings = sienna_cichlid_set_default_od_settings,
.od_edit_dpm_table = sienna_cichlid_od_edit_dpm_table,
- .restore_user_od_settings = smu_v11_0_restore_user_od_settings,
+ .restore_user_od_settings = sienna_cichlid_restore_user_od_settings,
.run_btc = sienna_cichlid_run_btc,
.set_power_source = smu_v11_0_set_power_source,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c
index 2019a81..b40bace 100644
--- a/drivers/gpu/drm/bridge/lontium-lt8912b.c
+++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c
@@ -676,8 +676,8 @@ static int lt8912_parse_dt(struct lt8912 *lt)
lt->hdmi_port = of_drm_find_bridge(port_node);
if (!lt->hdmi_port) {
- dev_err(lt->dev, "%s: Failed to get hdmi port\n", __func__);
- ret = -ENODEV;
+ ret = -EPROBE_DEFER;
+ dev_err_probe(lt->dev, ret, "%s: Failed to get hdmi port\n", __func__);
goto err_free_host_node;
}
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 3d0a4da..261a62e 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -2796,7 +2796,7 @@ u32 drm_edid_get_panel_id(struct i2c_adapter *adapter)
* the EDID then we'll just return 0.
*/
- base_block = kmalloc(EDID_LENGTH, GFP_KERNEL);
+ base_block = kzalloc(EDID_LENGTH, GFP_KERNEL);
if (!base_block)
return 0;
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 7a3cb08..a5d392f 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1388,10 +1388,13 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
*
* @lru: The LRU to scan
* @nr_to_scan: The number of pages to try to reclaim
+ * @remaining: The number of pages left to reclaim, should be initialized by caller
* @shrink: Callback to try to shrink/reclaim the object.
*/
unsigned long
-drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
+drm_gem_lru_scan(struct drm_gem_lru *lru,
+ unsigned int nr_to_scan,
+ unsigned long *remaining,
bool (*shrink)(struct drm_gem_object *obj))
{
struct drm_gem_lru still_in_lru;
@@ -1430,8 +1433,10 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
* hit shrinker in response to trying to get backing pages
* for this obj (ie. while it's lock is already held)
*/
- if (!dma_resv_trylock(obj->resv))
+ if (!dma_resv_trylock(obj->resv)) {
+ *remaining += obj->size >> PAGE_SHIFT;
goto tail;
+ }
if (shrink(obj)) {
freed += obj->size >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 75185a96..2b2163c 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -619,11 +619,14 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct
int ret;
if (obj->import_attach) {
- /* Drop the reference drm_gem_mmap_obj() acquired.*/
- drm_gem_object_put(obj);
vma->vm_private_data = NULL;
+ ret = dma_buf_mmap(obj->dma_buf, vma, 0);
- return dma_buf_mmap(obj->dma_buf, vma, 0);
+ /* Drop the reference drm_gem_mmap_obj() acquired.*/
+ if (!ret)
+ drm_gem_object_put(obj);
+
+ return ret;
}
ret = drm_gem_shmem_get_pages(shmem);
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 5522d61..b1a38e6 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -328,10 +328,17 @@ static const struct dmi_system_id orientation_data[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"),
},
.driver_data = (void *)&lcd1200x1920_rightside_up,
- }, { /* Lenovo Yoga Book X90F / X91F / X91L */
+ }, { /* Lenovo Yoga Book X90F / X90L */
.matches = {
- /* Non exact match to match all versions */
- DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"),
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
+ },
+ .driver_data = (void *)&lcd1200x1920_rightside_up,
+ }, { /* Lenovo Yoga Book X91F / X91L */
+ .matches = {
+ /* Non exact match to match F + L versions */
+ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
},
.driver_data = (void *)&lcd1200x1920_rightside_up,
}, { /* Lenovo Yoga Tablet 2 830F / 830L */
diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
index 82be0fb..d5b5d40 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -683,6 +683,14 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
*/
intel_vrr_send_push(new_crtc_state);
+ /*
+ * Seamless M/N update may need to update frame timings.
+ *
+ * FIXME Should be synchronized with the start of vblank somehow...
+ */
+ if (new_crtc_state->seamless_m_n && intel_crtc_needs_fastset(new_crtc_state))
+ intel_crtc_update_active_timings(new_crtc_state);
+
local_irq_enable();
if (intel_vgpu_active(dev_priv))
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index d3994e2..208b1b5 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -5145,6 +5145,7 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state,
* only fields that are know to not cause problems are preserved. */
saved_state->uapi = crtc_state->uapi;
+ saved_state->inherited = crtc_state->inherited;
saved_state->scaler_state = crtc_state->scaler_state;
saved_state->shared_dpll = crtc_state->shared_dpll;
saved_state->dpll_hw_state = crtc_state->dpll_hw_state;
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 54c517c..582234f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1631,6 +1631,8 @@ struct intel_psr {
bool psr2_sel_fetch_cff_enabled;
bool req_psr2_sdp_prior_scanline;
u8 sink_sync_latency;
+ u8 io_wake_lines;
+ u8 fast_wake_lines;
ktime_t last_entry_attempt;
ktime_t last_exit;
bool sink_not_reliable;
diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 257aa2b..3485d5e 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -384,15 +384,12 @@ static void disable_all_event_handlers(struct drm_i915_private *i915)
}
}
-static void pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable)
+static void adlp_pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable)
{
enum pipe pipe;
- if (DISPLAY_VER(i915) < 13)
- return;
-
/*
- * Wa_16015201720:adl-p,dg2, mtl
+ * Wa_16015201720:adl-p,dg2
* The WA requires clock gating to be disabled all the time
* for pipe A and B.
* For pipe C and D clock gating needs to be disabled only
@@ -408,6 +405,25 @@ static void pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable)
PIPEDMC_GATING_DIS, 0);
}
+static void mtl_pipedmc_clock_gating_wa(struct drm_i915_private *i915)
+{
+ /*
+ * Wa_16015201720
+ * The WA requires clock gating to be disabled all the time
+ * for pipe A and B.
+ */
+ intel_de_rmw(i915, GEN9_CLKGATE_DIS_0, 0,
+ MTL_PIPEDMC_GATING_DIS_A | MTL_PIPEDMC_GATING_DIS_B);
+}
+
+static void pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable)
+{
+ if (DISPLAY_VER(i915) >= 14 && enable)
+ mtl_pipedmc_clock_gating_wa(i915);
+ else if (DISPLAY_VER(i915) == 13)
+ adlp_pipedmc_clock_gating_wa(i915, enable);
+}
+
void intel_dmc_enable_pipe(struct drm_i915_private *i915, enum pipe pipe)
{
if (!has_dmc_id_fw(i915, PIPE_TO_DMC_ID(pipe)))
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 054a009e..2106b3d 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -265,6 +265,19 @@ static int intel_dp_mst_update_slots(struct intel_encoder *encoder,
return 0;
}
+static bool intel_dp_mst_has_audio(const struct drm_connector_state *conn_state)
+{
+ const struct intel_digital_connector_state *intel_conn_state =
+ to_intel_digital_connector_state(conn_state);
+ struct intel_connector *connector =
+ to_intel_connector(conn_state->connector);
+
+ if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO)
+ return connector->port->has_audio;
+ else
+ return intel_conn_state->force_audio == HDMI_AUDIO_ON;
+}
+
static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
struct intel_crtc_state *pipe_config,
struct drm_connector_state *conn_state)
@@ -272,10 +285,6 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
struct intel_dp *intel_dp = &intel_mst->primary->dp;
- struct intel_connector *connector =
- to_intel_connector(conn_state->connector);
- struct intel_digital_connector_state *intel_conn_state =
- to_intel_digital_connector_state(conn_state);
const struct drm_display_mode *adjusted_mode =
&pipe_config->hw.adjusted_mode;
struct link_config_limits limits;
@@ -287,11 +296,9 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
pipe_config->has_pch_encoder = false;
- if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO)
- pipe_config->has_audio = connector->port->has_audio;
- else
- pipe_config->has_audio =
- intel_conn_state->force_audio == HDMI_AUDIO_ON;
+ pipe_config->has_audio =
+ intel_dp_mst_has_audio(conn_state) &&
+ intel_audio_compute_config(encoder, pipe_config, conn_state);
/*
* for MST we always configure max link bw - the spec doesn't
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index f76b062..38825b3 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -210,6 +210,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
bool prealloc = false;
void __iomem *vaddr;
struct drm_i915_gem_object *obj;
+ struct i915_gem_ww_ctx ww;
int ret;
mutex_lock(&ifbdev->hpd_lock);
@@ -283,13 +284,24 @@ static int intelfb_create(struct drm_fb_helper *helper,
info->fix.smem_len = vma->size;
}
- vaddr = i915_vma_pin_iomap(vma);
- if (IS_ERR(vaddr)) {
- drm_err(&dev_priv->drm,
- "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr);
- ret = PTR_ERR(vaddr);
- goto out_unpin;
+ for_i915_gem_ww(&ww, ret, false) {
+ ret = i915_gem_object_lock(vma->obj, &ww);
+
+ if (ret)
+ continue;
+
+ vaddr = i915_vma_pin_iomap(vma);
+ if (IS_ERR(vaddr)) {
+ drm_err(&dev_priv->drm,
+ "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr);
+ ret = PTR_ERR(vaddr);
+ continue;
+ }
}
+
+ if (ret)
+ goto out_unpin;
+
info->screen_base = vaddr;
info->screen_size = vma->size;
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 7a72e15..9f1a0be 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -542,6 +542,14 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
val |= EDP_PSR2_FRAME_BEFORE_SU(max_t(u8, intel_dp->psr.sink_sync_latency + 1, 2));
val |= intel_psr2_get_tp_time(intel_dp);
+ if (DISPLAY_VER(dev_priv) >= 12) {
+ if (intel_dp->psr.io_wake_lines < 9 &&
+ intel_dp->psr.fast_wake_lines < 9)
+ val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2;
+ else
+ val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_3;
+ }
+
/* Wa_22012278275:adl-p */
if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_E0)) {
static const u8 map[] = {
@@ -558,31 +566,21 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
* Still using the default IO_BUFFER_WAKE and FAST_WAKE, see
* comments bellow for more information
*/
- u32 tmp, lines = 7;
+ u32 tmp;
- val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2;
-
- tmp = map[lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES];
+ tmp = map[intel_dp->psr.io_wake_lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES];
tmp = tmp << TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT;
val |= tmp;
- tmp = map[lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES];
+ tmp = map[intel_dp->psr.fast_wake_lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES];
tmp = tmp << TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT;
val |= tmp;
} else if (DISPLAY_VER(dev_priv) >= 12) {
- /*
- * TODO: 7 lines of IO_BUFFER_WAKE and FAST_WAKE are default
- * values from BSpec. In order to setting an optimal power
- * consumption, lower than 4k resolution mode needs to decrease
- * IO_BUFFER_WAKE and FAST_WAKE. And higher than 4K resolution
- * mode needs to increase IO_BUFFER_WAKE and FAST_WAKE.
- */
- val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2;
- val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(7);
- val |= TGL_EDP_PSR2_FAST_WAKE(7);
+ val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines);
+ val |= TGL_EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines);
} else if (DISPLAY_VER(dev_priv) >= 9) {
- val |= EDP_PSR2_IO_BUFFER_WAKE(7);
- val |= EDP_PSR2_FAST_WAKE(7);
+ val |= EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines);
+ val |= EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines);
}
if (intel_dp->psr.req_psr2_sdp_prior_scanline)
@@ -842,6 +840,46 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d
return true;
}
+static bool _compute_psr2_wake_times(struct intel_dp *intel_dp,
+ struct intel_crtc_state *crtc_state)
+{
+ struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+ int io_wake_lines, io_wake_time, fast_wake_lines, fast_wake_time;
+ u8 max_wake_lines;
+
+ if (DISPLAY_VER(i915) >= 12) {
+ io_wake_time = 42;
+ /*
+ * According to Bspec it's 42us, but based on testing
+ * it is not enough -> use 45 us.
+ */
+ fast_wake_time = 45;
+ max_wake_lines = 12;
+ } else {
+ io_wake_time = 50;
+ fast_wake_time = 32;
+ max_wake_lines = 8;
+ }
+
+ io_wake_lines = intel_usecs_to_scanlines(
+ &crtc_state->uapi.adjusted_mode, io_wake_time);
+ fast_wake_lines = intel_usecs_to_scanlines(
+ &crtc_state->uapi.adjusted_mode, fast_wake_time);
+
+ if (io_wake_lines > max_wake_lines ||
+ fast_wake_lines > max_wake_lines)
+ return false;
+
+ if (i915->params.psr_safest_params)
+ io_wake_lines = fast_wake_lines = max_wake_lines;
+
+ /* According to Bspec lower limit should be set as 7 lines. */
+ intel_dp->psr.io_wake_lines = max(io_wake_lines, 7);
+ intel_dp->psr.fast_wake_lines = max(fast_wake_lines, 7);
+
+ return true;
+}
+
static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
struct intel_crtc_state *crtc_state)
{
@@ -936,6 +974,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
return false;
}
+ if (!_compute_psr2_wake_times(intel_dp, crtc_state)) {
+ drm_dbg_kms(&dev_priv->drm,
+ "PSR2 not enabled, Unable to use long enough wake times\n");
+ return false;
+ }
+
if (HAS_PSR2_SEL_FETCH(dev_priv)) {
if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) &&
!HAS_PSR_HW_TRACKING(dev_priv)) {
diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c
index c65c771..1cfb94b 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c
@@ -1419,6 +1419,36 @@ static const struct intel_mpllb_state dg2_hdmi_262750 = {
REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
};
+static const struct intel_mpllb_state dg2_hdmi_267300 = {
+ .clock = 267300,
+ .ref_control =
+ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
+ .mpllb_cp =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
+ .mpllb_div =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
+ .mpllb_div2 =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 74) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
+ .mpllb_fracn1 =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
+ .mpllb_fracn2 =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 30146) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 36699),
+ .mpllb_sscen =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
+};
+
static const struct intel_mpllb_state dg2_hdmi_268500 = {
.clock = 268500,
.ref_control =
@@ -1509,6 +1539,36 @@ static const struct intel_mpllb_state dg2_hdmi_241500 = {
REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
};
+static const struct intel_mpllb_state dg2_hdmi_319890 = {
+ .clock = 319890,
+ .ref_control =
+ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
+ .mpllb_cp =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
+ .mpllb_div =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2),
+ .mpllb_div2 =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 94) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
+ .mpllb_fracn1 =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
+ .mpllb_fracn2 =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 64094) |
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 13631),
+ .mpllb_sscen =
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
+};
+
static const struct intel_mpllb_state dg2_hdmi_497750 = {
.clock = 497750,
.ref_control =
@@ -1696,8 +1756,10 @@ static const struct intel_mpllb_state * const dg2_hdmi_tables[] = {
&dg2_hdmi_209800,
&dg2_hdmi_241500,
&dg2_hdmi_262750,
+ &dg2_hdmi_267300,
&dg2_hdmi_268500,
&dg2_hdmi_296703,
+ &dg2_hdmi_319890,
&dg2_hdmi_497750,
&dg2_hdmi_592000,
&dg2_hdmi_593407,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index f0dbfc4..40d357c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -737,12 +737,12 @@ int intel_gt_init(struct intel_gt *gt)
if (err)
goto err_gt;
- intel_uc_init_late(>->uc);
-
err = i915_inject_probe_error(gt->i915, -EIO);
if (err)
goto err_gt;
+ intel_uc_init_late(>->uc);
+
intel_migrate_init(>->migrate, gt);
goto out_fw;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index cef3d6f..56b993f6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -21,31 +21,10 @@
#include "intel_rc6.h"
#include "intel_rps.h"
#include "intel_wakeref.h"
-#include "intel_pcode.h"
#include "pxp/intel_pxp_pm.h"
#define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2)
-static void mtl_media_busy(struct intel_gt *gt)
-{
- /* Wa_14017073508: mtl */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
- gt->type == GT_MEDIA)
- snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
- PCODE_MBOX_GT_STATE_MEDIA_BUSY,
- PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
-}
-
-static void mtl_media_idle(struct intel_gt *gt)
-{
- /* Wa_14017073508: mtl */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
- gt->type == GT_MEDIA)
- snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
- PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
- PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
-}
-
static void user_forcewake(struct intel_gt *gt, bool suspend)
{
int count = atomic_read(>->user_wakeref);
@@ -93,9 +72,6 @@ static int __gt_unpark(struct intel_wakeref *wf)
GT_TRACE(gt, "\n");
- /* Wa_14017073508: mtl */
- mtl_media_busy(gt);
-
/*
* It seems that the DMC likes to transition between the DC states a lot
* when there are no connected displays (no active power domains) during
@@ -145,9 +121,6 @@ static int __gt_park(struct intel_wakeref *wf)
GEM_BUG_ON(!wakeref);
intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
- /* Wa_14017073508: mtl */
- mtl_media_idle(gt);
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 83df4cd..80dbbef 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -580,7 +580,7 @@ static bool perf_limit_reasons_eval(void *data)
}
DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get,
- perf_limit_reasons_clear, "%llu\n");
+ perf_limit_reasons_clear, "0x%llx\n");
void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 5c91622d..f4150f6 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -486,6 +486,7 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
static bool rc6_supported(struct intel_rc6 *rc6)
{
struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_gt *gt = rc6_to_gt(rc6);
if (!HAS_RC6(i915))
return false;
@@ -502,6 +503,13 @@ static bool rc6_supported(struct intel_rc6 *rc6)
return false;
}
+ if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) &&
+ gt->type == GT_MEDIA) {
+ drm_notice(&i915->drm,
+ "Media RC6 disabled on A step\n");
+ return false;
+ }
+
return true;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index aa87d38..d7e8c37 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -27,7 +27,7 @@ struct drm_printer;
* is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the
* I915_MAX_SS_FUSE_BITS value below).
*/
-#define GEN_MAX_SS_PER_HSW_SLICE 6
+#define GEN_MAX_SS_PER_HSW_SLICE 8
/*
* Maximum number of 32-bit registers used by hardware to express the
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index fc3b994..710999d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -1571,6 +1571,27 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf,
#endif //CONFIG_DRM_I915_CAPTURE_ERROR
+static void guc_capture_find_ecode(struct intel_engine_coredump *ee)
+{
+ struct gcap_reg_list_info *reginfo;
+ struct guc_mmio_reg *regs;
+ i915_reg_t reg_ipehr = RING_IPEHR(0);
+ i915_reg_t reg_instdone = RING_INSTDONE(0);
+ int i;
+
+ if (!ee->guc_capture_node)
+ return;
+
+ reginfo = ee->guc_capture_node->reginfo + GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE;
+ regs = reginfo->regs;
+ for (i = 0; i < reginfo->num_regs; i++) {
+ if (regs[i].offset == reg_ipehr.reg)
+ ee->ipehr = regs[i].value;
+ else if (regs[i].offset == reg_instdone.reg)
+ ee->instdone.instdone = regs[i].value;
+ }
+}
+
void intel_guc_capture_free_node(struct intel_engine_coredump *ee)
{
if (!ee || !ee->guc_capture_node)
@@ -1612,6 +1633,7 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt,
list_del(&n->link);
ee->guc_capture_node = n;
ee->guc_capture = guc->capture;
+ guc_capture_find_ecode(ee);
return;
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
index b585509..8f8dd05 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
@@ -11,20 +11,9 @@
static bool __guc_rc_supported(struct intel_guc *guc)
{
- struct intel_gt *gt = guc_to_gt(guc);
-
- /*
- * Wa_14017073508: mtl
- * Do not enable gucrc to avoid additional interrupts which
- * may disrupt pcode wa.
- */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
- gt->type == GT_MEDIA)
- return false;
-
/* GuC RC is unavailable for pre-Gen12 */
return guc->submission_supported &&
- GRAPHICS_VER(gt->i915) >= 12;
+ GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
}
static bool __guc_rc_selected(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 7412abf..8ef9388 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -92,8 +92,7 @@ static void debug_active_init(struct i915_active *ref)
static void debug_active_activate(struct i915_active *ref)
{
lockdep_assert_held(&ref->tree_lock);
- if (!atomic_read(&ref->count)) /* before the first inc */
- debug_object_activate(ref, &active_debug_desc);
+ debug_object_activate(ref, &active_debug_desc);
}
static void debug_active_deactivate(struct i915_active *ref)
@@ -422,12 +421,12 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
* we can use it to substitute for the pending idle-barrer
* request that we want to emit on the kernel_context.
*/
- __active_del_barrier(ref, node_from_active(active));
- return true;
+ return __active_del_barrier(ref, node_from_active(active));
}
int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
{
+ u64 idx = i915_request_timeline(rq)->fence_context;
struct dma_fence *fence = &rq->fence;
struct i915_active_fence *active;
int err;
@@ -437,16 +436,19 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
if (err)
return err;
- active = active_instance(ref, i915_request_timeline(rq)->fence_context);
- if (!active) {
- err = -ENOMEM;
- goto out;
- }
+ do {
+ active = active_instance(ref, idx);
+ if (!active) {
+ err = -ENOMEM;
+ goto out;
+ }
- if (replace_barrier(ref, active)) {
- RCU_INIT_POINTER(active->fence, NULL);
- atomic_dec(&ref->count);
- }
+ if (replace_barrier(ref, active)) {
+ RCU_INIT_POINTER(active->fence, NULL);
+ atomic_dec(&ref->count);
+ }
+ } while (unlikely(is_barrier(active)));
+
if (!__i915_active_fence_set(active, fence))
__i915_active_acquire(ref);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3b26423..747b53b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1786,9 +1786,11 @@
* GEN9 clock gating regs
*/
#define GEN9_CLKGATE_DIS_0 _MMIO(0x46530)
-#define DARBF_GATING_DIS (1 << 27)
-#define PWM2_GATING_DIS (1 << 14)
-#define PWM1_GATING_DIS (1 << 13)
+#define DARBF_GATING_DIS REG_BIT(27)
+#define MTL_PIPEDMC_GATING_DIS_A REG_BIT(15)
+#define MTL_PIPEDMC_GATING_DIS_B REG_BIT(14)
+#define PWM2_GATING_DIS REG_BIT(14)
+#define PWM1_GATING_DIS REG_BIT(13)
#define GEN9_CLKGATE_DIS_3 _MMIO(0x46538)
#define TGL_VRH_GATING_DIS REG_BIT(31)
@@ -6596,15 +6598,6 @@
/* XEHP_PCODE_FREQUENCY_CONFIG param2 */
#define PCODE_MBOX_DOMAIN_NONE 0x0
#define PCODE_MBOX_DOMAIN_MEDIAFF 0x3
-
-/* Wa_14017210380: mtl */
-#define PCODE_MBOX_GT_STATE 0x50
-/* sub-commands (param1) */
-#define PCODE_MBOX_GT_STATE_MEDIA_BUSY 0x1
-#define PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY 0x2
-/* param2 */
-#define PCODE_MBOX_GT_STATE_DOMAIN_MEDIA 0x1
-
#define GEN6_PCODE_DATA _MMIO(0x138128)
#define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8
#define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 79bfe39..7caf937 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -325,23 +325,23 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
ret = meson_encoder_hdmi_init(priv);
if (ret)
- goto exit_afbcd;
+ goto unbind_all;
ret = meson_plane_create(priv);
if (ret)
- goto exit_afbcd;
+ goto unbind_all;
ret = meson_overlay_create(priv);
if (ret)
- goto exit_afbcd;
+ goto unbind_all;
ret = meson_crtc_create(priv);
if (ret)
- goto exit_afbcd;
+ goto unbind_all;
ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm);
if (ret)
- goto exit_afbcd;
+ goto unbind_all;
drm_mode_config_reset(drm);
@@ -359,6 +359,9 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
uninstall_irq:
free_irq(priv->vsync_irq, drm);
+unbind_all:
+ if (has_components)
+ component_unbind_all(drm->dev, drm);
exit_afbcd:
if (priv->afbcd.ops)
priv->afbcd.ops->exit(priv);
diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
index 534621a..3d04687 100644
--- a/drivers/gpu/drm/meson/meson_dw_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
@@ -718,7 +718,7 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
dw_plat_data = &meson_dw_hdmi->dw_plat_data;
ret = devm_regulator_get_enable_optional(dev, "hdmi");
- if (ret < 0)
+ if (ret < 0 && ret != -ENODEV)
return ret;
meson_dw_hdmi->hdmitx_apb = devm_reset_control_get_exclusive(dev,
diff --git a/drivers/gpu/drm/meson/meson_vpp.c b/drivers/gpu/drm/meson/meson_vpp.c
index 1548376..5df1957 100644
--- a/drivers/gpu/drm/meson/meson_vpp.c
+++ b/drivers/gpu/drm/meson/meson_vpp.c
@@ -100,6 +100,8 @@ void meson_vpp_init(struct meson_drm *priv)
priv->io_base + _REG(VPP_DOLBY_CTRL));
writel_relaxed(0x1020080,
priv->io_base + _REG(VPP_DUMMY_DATA1));
+ writel_relaxed(0x42020,
+ priv->io_base + _REG(VPP_DUMMY_DATA));
} else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
writel_relaxed(0xf, priv->io_base + _REG(DOLBY_PATH_CTRL));
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
index 051bdbc..f38296a 100644
--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -107,6 +107,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
bool (*shrink)(struct drm_gem_object *obj);
bool cond;
unsigned long freed;
+ unsigned long remaining;
} stages[] = {
/* Stages of progressively more aggressive/expensive reclaim: */
{ &priv->lru.dontneed, purge, true },
@@ -116,14 +117,18 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
};
long nr = sc->nr_to_scan;
unsigned long freed = 0;
+ unsigned long remaining = 0;
for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) {
if (!stages[i].cond)
continue;
stages[i].freed =
- drm_gem_lru_scan(stages[i].lru, nr, stages[i].shrink);
+ drm_gem_lru_scan(stages[i].lru, nr,
+ &stages[i].remaining,
+ stages[i].shrink);
nr -= stages[i].freed;
freed += stages[i].freed;
+ remaining += stages[i].remaining;
}
if (freed) {
@@ -132,7 +137,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
stages[3].freed);
}
- return (freed > 0) ? freed : SHRINK_STOP;
+ return (freed > 0 && remaining > 0) ? freed : SHRINK_STOP;
}
#ifdef CONFIG_DEBUG_FS
@@ -182,10 +187,12 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
NULL,
};
unsigned idx, unmapped = 0;
+ unsigned long remaining = 0;
for (idx = 0; lrus[idx] && unmapped < vmap_shrink_limit; idx++) {
unmapped += drm_gem_lru_scan(lrus[idx],
vmap_shrink_limit - unmapped,
+ &remaining,
vmap_shrink);
}
diff --git