drivers/gpu/drm/xe/xe_hmm.c - fs/xfs/xfs-linux - Git at Google

 // SPDX-License-Identifier: MIT
 /*
  * Copyright © 2024 Intel Corporation
  */

 #include <linux/scatterlist.h>
 #include <linux/mmu_notifier.h>
 #include <linux/dma-mapping.h>
 #include <linux/memremap.h>
 #include <linux/swap.h>
 #include <linux/hmm.h>
 #include <linux/mm.h>
 #include "xe_hmm.h"
 #include "xe_vm.h"
 #include "xe_bo.h"

 static u64 xe_npages_in_range(unsigned long start, unsigned long end)
 {
 	return (end - start) >> PAGE_SHIFT;
 }

 static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
 		       struct hmm_range *range, struct rw_semaphore *notifier_sem)
 {
 	unsigned long i, npages, hmm_pfn;
 	unsigned long num_chunks = 0;
 	int ret;

 	/* HMM docs says this is needed. */
 	ret = down_read_interruptible(notifier_sem);
 	if (ret)
 		return ret;

 	if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) {
 		up_read(notifier_sem);
 		return -EAGAIN;
 	}

 	npages = xe_npages_in_range(range->start, range->end);
 	for (i = 0; i < npages;) {
 		unsigned long len;

 		hmm_pfn = range->hmm_pfns[i];
 		xe_assert(xe, hmm_pfn & HMM_PFN_VALID);

 		len = 1UL << hmm_pfn_to_map_order(hmm_pfn);

 		/* If order > 0 the page may extend beyond range->start */
 		len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1);
 		i += len;
 		num_chunks++;
 	}
 	up_read(notifier_sem);

 	return sg_alloc_table(st, num_chunks, GFP_KERNEL);
 }

 /**
  * xe_build_sg() - build a scatter gather table for all the physical pages/pfn
  * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
  * and will be used to program GPU page table later.
  * @xe: the xe device who will access the dma-address in sg table
  * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
  * has the pfn numbers of pages that back up this hmm address range.
  * @st: pointer to the sg table.
  * @notifier_sem: The xe notifier lock.
  * @write: whether we write to this range. This decides dma map direction
  * for system pages. If write we map it bi-diretional; otherwise
  * DMA_TO_DEVICE
  *
  * All the contiguous pfns will be collapsed into one entry in
  * the scatter gather table. This is for the purpose of efficiently
  * programming GPU page table.
  *
  * The dma_address in the sg table will later be used by GPU to
  * access memory. So if the memory is system memory, we need to
  * do a dma-mapping so it can be accessed by GPU/DMA.
  *
  * FIXME: This function currently only support pages in system
  * memory. If the memory is GPU local memory (of the GPU who
  * is going to access memory), we need gpu dpa (device physical
  * address), and there is no need of dma-mapping. This is TBD.
  *
  * FIXME: dma-mapping for peer gpu device to access remote gpu's
  * memory. Add this when you support p2p
  *
  * This function allocates the storage of the sg table. It is
  * caller's responsibility to free it calling sg_free_table.
  *
  * Returns 0 if successful; -ENOMEM if fails to allocate memory
  */
 static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
 		       struct sg_table *st,
 		       struct rw_semaphore *notifier_sem,
 		       bool write)
 {
 	unsigned long npages = xe_npages_in_range(range->start, range->end);
 	struct device *dev = xe->drm.dev;
 	struct scatterlist *sgl;
 	struct page *page;
 	unsigned long i, j;

 	lockdep_assert_held(notifier_sem);

 	i = 0;
 	for_each_sg(st->sgl, sgl, st->nents, j) {
 		unsigned long hmm_pfn, size;

 		hmm_pfn = range->hmm_pfns[i];
 		page = hmm_pfn_to_page(hmm_pfn);
 		xe_assert(xe, !is_device_private_page(page));

 		size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
 		size -= page_to_pfn(page) & (size - 1);
 		i += size;

 		if (unlikely(j == st->nents - 1)) {
 			xe_assert(xe, i >= npages);
 			if (i > npages)
 				size -= (i - npages);

 			sg_mark_end(sgl);
 		} else {
 			xe_assert(xe, i < npages);
 		}

 		sg_set_page(sgl, page, size << PAGE_SHIFT, 0);
 	}

 	return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
 			       DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
 }

 static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma)
 {
 	struct xe_userptr *userptr = &uvma->userptr;
 	struct xe_vm *vm = xe_vma_vm(&uvma->vma);

 	lockdep_assert_held_write(&vm->lock);
 	lockdep_assert_held(&vm->userptr.notifier_lock);

 	mutex_lock(&userptr->unmap_mutex);
 	xe_assert(vm->xe, !userptr->mapped);
 	userptr->mapped = true;
 	mutex_unlock(&userptr->unmap_mutex);
 }

 void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma)
 {
 	struct xe_userptr *userptr = &uvma->userptr;
 	struct xe_vma *vma = &uvma->vma;
 	bool write = !xe_vma_read_only(vma);
 	struct xe_vm *vm = xe_vma_vm(vma);
 	struct xe_device *xe = vm->xe;

 	if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) &&
 	    !lockdep_is_held_type(&vm->lock, 0) &&
 	    !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
 		/* Don't unmap in exec critical section. */
 		xe_vm_assert_held(vm);
 		/* Don't unmap while mapping the sg. */
 		lockdep_assert_held(&vm->lock);
 	}

 	mutex_lock(&userptr->unmap_mutex);
 	if (userptr->sg && userptr->mapped)
 		dma_unmap_sgtable(xe->drm.dev, userptr->sg,
 				  write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
 	userptr->mapped = false;
 	mutex_unlock(&userptr->unmap_mutex);
 }

 /**
  * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr
  * @uvma: the userptr vma which hold the scatter gather table
  *
  * With function xe_userptr_populate_range, we allocate storage of
  * the userptr sg table. This is a helper function to free this
  * sg table, and dma unmap the address in the table.
  */
 void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
 {
 	struct xe_userptr *userptr = &uvma->userptr;

 	xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg);
 	xe_hmm_userptr_unmap(uvma);
 	sg_free_table(userptr->sg);
 	userptr->sg = NULL;
 }

 /**
  * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual
  * address range
  *
  * @uvma: userptr vma which has information of the range to populate.
  * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller.
  *
  * This function populate the physical pages of a virtual
  * address range. The populated physical pages is saved in
  * userptr's sg table. It is similar to get_user_pages but call
  * hmm_range_fault.
  *
  * This function also read mmu notifier sequence # (
  * mmu_interval_read_begin), for the purpose of later
  * comparison (through mmu_interval_read_retry).
  *
  * This must be called with mmap read or write lock held.
  *
  * This function allocates the storage of the userptr sg table.
  * It is caller's responsibility to free it calling sg_free_table.
  *
  * returns: 0 for success; negative error no on failure
  */
 int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
 				  bool is_mm_mmap_locked)
 {
 	unsigned long timeout =
 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 	unsigned long *pfns;
 	struct xe_userptr *userptr;
 	struct xe_vma *vma = &uvma->vma;
 	u64 userptr_start = xe_vma_userptr(vma);
 	u64 userptr_end = userptr_start + xe_vma_size(vma);
 	struct xe_vm *vm = xe_vma_vm(vma);
 	struct hmm_range hmm_range = {
 		.pfn_flags_mask = 0, /* ignore pfns */
 		.default_flags = HMM_PFN_REQ_FAULT,
 		.start = userptr_start,
 		.end = userptr_end,
 		.notifier = &uvma->userptr.notifier,
 		.dev_private_owner = vm->xe,
 	};
 	bool write = !xe_vma_read_only(vma);
 	unsigned long notifier_seq;
 	u64 npages;
 	int ret;

 	userptr = &uvma->userptr;

 	if (is_mm_mmap_locked)
 		mmap_assert_locked(userptr->notifier.mm);

 	if (vma->gpuva.flags & XE_VMA_DESTROYED)
 		return 0;

 	notifier_seq = mmu_interval_read_begin(&userptr->notifier);
 	if (notifier_seq == userptr->notifier_seq)
 		return 0;

 	if (userptr->sg)
 		xe_hmm_userptr_free_sg(uvma);

 	npages = xe_npages_in_range(userptr_start, userptr_end);
 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
 	if (unlikely(!pfns))
 		return -ENOMEM;

 	if (write)
 		hmm_range.default_flags |= HMM_PFN_REQ_WRITE;

 	if (!mmget_not_zero(userptr->notifier.mm)) {
 		ret = -EFAULT;
 		goto free_pfns;
 	}

 	hmm_range.hmm_pfns = pfns;

 	while (true) {
 		hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier);

 		if (!is_mm_mmap_locked)
 			mmap_read_lock(userptr->notifier.mm);

 		ret = hmm_range_fault(&hmm_range);

 		if (!is_mm_mmap_locked)
 			mmap_read_unlock(userptr->notifier.mm);

 		if (ret == -EBUSY) {
 			if (time_after(jiffies, timeout))
 				break;

 			continue;
 		}
 		break;
 	}

 	mmput(userptr->notifier.mm);

 	if (ret)
 		goto free_pfns;

 	ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock);
 	if (ret)
 		goto free_pfns;

 	ret = down_read_interruptible(&vm->userptr.notifier_lock);
 	if (ret)
 		goto free_st;

 	if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) {
 		ret = -EAGAIN;
 		goto out_unlock;
 	}

 	ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt,
 			  &vm->userptr.notifier_lock, write);
 	if (ret)
 		goto out_unlock;

 	userptr->sg = &userptr->sgt;
 	xe_hmm_userptr_set_mapped(uvma);
 	userptr->notifier_seq = hmm_range.notifier_seq;
 	up_read(&vm->userptr.notifier_lock);
 	kvfree(pfns);
 	return 0;

 out_unlock:
 	up_read(&vm->userptr.notifier_lock);
 free_st:
 	sg_free_table(&userptr->sgt);
 free_pfns:
 	kvfree(pfns);
 	return ret;
 }
	// SPDX-License-Identifier: MIT
	/*
	* Copyright © 2024 Intel Corporation
	*/

	#include <linux/scatterlist.h>
	#include <linux/mmu_notifier.h>
	#include <linux/dma-mapping.h>
	#include <linux/memremap.h>
	#include <linux/swap.h>
	#include <linux/hmm.h>
	#include <linux/mm.h>
	#include "xe_hmm.h"
	#include "xe_vm.h"
	#include "xe_bo.h"

	static u64 xe_npages_in_range(unsigned long start, unsigned long end)
	{
	return (end - start) >> PAGE_SHIFT;
	}

	static int xe_alloc_sg(struct xe_device xe, struct sg_table st,
	struct hmm_range range, struct rw_semaphore notifier_sem)
	{
	unsigned long i, npages, hmm_pfn;
	unsigned long num_chunks = 0;
	int ret;

	/* HMM docs says this is needed. */
	ret = down_read_interruptible(notifier_sem);
	if (ret)
	return ret;

	if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) {
	up_read(notifier_sem);
	return -EAGAIN;
	}

	npages = xe_npages_in_range(range->start, range->end);
	for (i = 0; i < npages;) {
	unsigned long len;

	hmm_pfn = range->hmm_pfns[i];
	xe_assert(xe, hmm_pfn & HMM_PFN_VALID);

	len = 1UL << hmm_pfn_to_map_order(hmm_pfn);

	/* If order > 0 the page may extend beyond range->start */
	len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1);
	i += len;
	num_chunks++;
	}
	up_read(notifier_sem);

	return sg_alloc_table(st, num_chunks, GFP_KERNEL);
	}

	/**
	* xe_build_sg() - build a scatter gather table for all the physical pages/pfn
	* in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
	* and will be used to program GPU page table later.
	* @xe: the xe device who will access the dma-address in sg table
	* @range: the hmm range that we build the sg table from. range->hmm_pfns[]
	* has the pfn numbers of pages that back up this hmm address range.
	* @st: pointer to the sg table.
	* @notifier_sem: The xe notifier lock.
	* @write: whether we write to this range. This decides dma map direction
	* for system pages. If write we map it bi-diretional; otherwise
	* DMA_TO_DEVICE
	*
	* All the contiguous pfns will be collapsed into one entry in
	* the scatter gather table. This is for the purpose of efficiently
	* programming GPU page table.
	*
	* The dma_address in the sg table will later be used by GPU to
	* access memory. So if the memory is system memory, we need to
	* do a dma-mapping so it can be accessed by GPU/DMA.
	*
	* FIXME: This function currently only support pages in system
	* memory. If the memory is GPU local memory (of the GPU who
	* is going to access memory), we need gpu dpa (device physical
	* address), and there is no need of dma-mapping. This is TBD.
	*
	* FIXME: dma-mapping for peer gpu device to access remote gpu's
	* memory. Add this when you support p2p
	*
	* This function allocates the storage of the sg table. It is
	* caller's responsibility to free it calling sg_free_table.
	*
	* Returns 0 if successful; -ENOMEM if fails to allocate memory
	*/
	static int xe_build_sg(struct xe_device xe, struct hmm_range range,
	struct sg_table *st,
	struct rw_semaphore *notifier_sem,
	bool write)
	{
	unsigned long npages = xe_npages_in_range(range->start, range->end);
	struct device *dev = xe->drm.dev;
	struct scatterlist *sgl;
	struct page *page;
	unsigned long i, j;

	lockdep_assert_held(notifier_sem);

	i = 0;
	for_each_sg(st->sgl, sgl, st->nents, j) {
	unsigned long hmm_pfn, size;

	hmm_pfn = range->hmm_pfns[i];
	page = hmm_pfn_to_page(hmm_pfn);
	xe_assert(xe, !is_device_private_page(page));

	size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
	size -= page_to_pfn(page) & (size - 1);
	i += size;

	if (unlikely(j == st->nents - 1)) {
	xe_assert(xe, i >= npages);
	if (i > npages)
	size -= (i - npages);

	sg_mark_end(sgl);
	} else {
	xe_assert(xe, i < npages);
	}

	sg_set_page(sgl, page, size << PAGE_SHIFT, 0);
	}

	return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
	DMA_ATTR_SKIP_CPU_SYNC \| DMA_ATTR_NO_KERNEL_MAPPING);
	}

	static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma)
	{
	struct xe_userptr *userptr = &uvma->userptr;
	struct xe_vm *vm = xe_vma_vm(&uvma->vma);

	lockdep_assert_held_write(&vm->lock);
	lockdep_assert_held(&vm->userptr.notifier_lock);

	mutex_lock(&userptr->unmap_mutex);
	xe_assert(vm->xe, !userptr->mapped);
	userptr->mapped = true;
	mutex_unlock(&userptr->unmap_mutex);
	}

	void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma)
	{
	struct xe_userptr *userptr = &uvma->userptr;
	struct xe_vma *vma = &uvma->vma;
	bool write = !xe_vma_read_only(vma);
	struct xe_vm *vm = xe_vma_vm(vma);
	struct xe_device *xe = vm->xe;

	if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) &&
	!lockdep_is_held_type(&vm->lock, 0) &&
	!(vma->gpuva.flags & XE_VMA_DESTROYED)) {
	/* Don't unmap in exec critical section. */
	xe_vm_assert_held(vm);
	/* Don't unmap while mapping the sg. */
	lockdep_assert_held(&vm->lock);
	}

	mutex_lock(&userptr->unmap_mutex);
	if (userptr->sg && userptr->mapped)
	dma_unmap_sgtable(xe->drm.dev, userptr->sg,
	write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
	userptr->mapped = false;
	mutex_unlock(&userptr->unmap_mutex);
	}

	/**
	* xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr
	* @uvma: the userptr vma which hold the scatter gather table
	*
	* With function xe_userptr_populate_range, we allocate storage of
	* the userptr sg table. This is a helper function to free this
	* sg table, and dma unmap the address in the table.
	*/
	void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
	{
	struct xe_userptr *userptr = &uvma->userptr;

	xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg);
	xe_hmm_userptr_unmap(uvma);
	sg_free_table(userptr->sg);
	userptr->sg = NULL;
	}

	/**
	* xe_hmm_userptr_populate_range() - Populate physical pages of a virtual
	* address range
	*
	* @uvma: userptr vma which has information of the range to populate.
	* @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller.
	*
	* This function populate the physical pages of a virtual
	* address range. The populated physical pages is saved in
	* userptr's sg table. It is similar to get_user_pages but call
	* hmm_range_fault.
	*
	* This function also read mmu notifier sequence # (
	* mmu_interval_read_begin), for the purpose of later
	* comparison (through mmu_interval_read_retry).
	*
	* This must be called with mmap read or write lock held.
	*
	* This function allocates the storage of the userptr sg table.
	* It is caller's responsibility to free it calling sg_free_table.
	*
	* returns: 0 for success; negative error no on failure
	*/
	int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
	bool is_mm_mmap_locked)
	{
	unsigned long timeout =
	jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
	unsigned long *pfns;
	struct xe_userptr *userptr;
	struct xe_vma *vma = &uvma->vma;
	u64 userptr_start = xe_vma_userptr(vma);
	u64 userptr_end = userptr_start + xe_vma_size(vma);
	struct xe_vm *vm = xe_vma_vm(vma);
	struct hmm_range hmm_range = {
	.pfn_flags_mask = 0, /* ignore pfns */
	.default_flags = HMM_PFN_REQ_FAULT,
	.start = userptr_start,
	.end = userptr_end,
	.notifier = &uvma->userptr.notifier,
	.dev_private_owner = vm->xe,
	};
	bool write = !xe_vma_read_only(vma);
	unsigned long notifier_seq;
	u64 npages;
	int ret;

	userptr = &uvma->userptr;

	if (is_mm_mmap_locked)
	mmap_assert_locked(userptr->notifier.mm);

	if (vma->gpuva.flags & XE_VMA_DESTROYED)
	return 0;

	notifier_seq = mmu_interval_read_begin(&userptr->notifier);
	if (notifier_seq == userptr->notifier_seq)
	return 0;

	if (userptr->sg)
	xe_hmm_userptr_free_sg(uvma);

	npages = xe_npages_in_range(userptr_start, userptr_end);
	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
	if (unlikely(!pfns))
	return -ENOMEM;

	if (write)
	hmm_range.default_flags \|= HMM_PFN_REQ_WRITE;

	if (!mmget_not_zero(userptr->notifier.mm)) {
	ret = -EFAULT;
	goto free_pfns;
	}

	hmm_range.hmm_pfns = pfns;

	while (true) {
	hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier);

	if (!is_mm_mmap_locked)
	mmap_read_lock(userptr->notifier.mm);

	ret = hmm_range_fault(&hmm_range);

	if (!is_mm_mmap_locked)
	mmap_read_unlock(userptr->notifier.mm);

	if (ret == -EBUSY) {
	if (time_after(jiffies, timeout))
	break;

	continue;
	}
	break;
	}

	mmput(userptr->notifier.mm);

	if (ret)
	goto free_pfns;

	ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock);
	if (ret)
	goto free_pfns;

	ret = down_read_interruptible(&vm->userptr.notifier_lock);
	if (ret)
	goto free_st;

	if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) {
	ret = -EAGAIN;
	goto out_unlock;
	}

	ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt,
	&vm->userptr.notifier_lock, write);
	if (ret)
	goto out_unlock;

	userptr->sg = &userptr->sgt;
	xe_hmm_userptr_set_mapped(uvma);
	userptr->notifier_seq = hmm_range.notifier_seq;
	up_read(&vm->userptr.notifier_lock);
	kvfree(pfns);
	return 0;

	out_unlock:
	up_read(&vm->userptr.notifier_lock);
	free_st:
	sg_free_table(&userptr->sgt);
	free_pfns:
	kvfree(pfns);
	return ret;
	}