Merge tag 'mm-hotfixes-stable-2022-11-24' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull hotfixes from Andrew Morton:
"24 MM and non-MM hotfixes. 8 marked cc:stable and 16 for post-6.0
issues.
There have been a lot of hotfixes this cycle, and this is quite a
large batch given how far we are into the -rc cycle. Presumably a
reflection of the unusually large amount of MM material which went
into 6.1-rc1"
* tag 'mm-hotfixes-stable-2022-11-24' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (24 commits)
test_kprobes: fix implicit declaration error of test_kprobes
nilfs2: fix nilfs_sufile_mark_dirty() not set segment usage as dirty
mm/cgroup/reclaim: fix dirty pages throttling on cgroup v1
mm: fix unexpected changes to {failslab|fail_page_alloc}.attr
swapfile: fix soft lockup in scan_swap_map_slots
hugetlb: fix __prep_compound_gigantic_page page flag setting
kfence: fix stack trace pruning
proc/meminfo: fix spacing in SecPageTables
mm: multi-gen LRU: retry folios written back while isolated
mailmap: update email address for Satya Priya
mm/migrate_device: return number of migrating pages in args->cpages
kbuild: fix -Wimplicit-function-declaration in license_is_gpl_compatible
MAINTAINERS: update Alex Hung's email address
mailmap: update Alex Hung's email address
mm: mmap: fix documentation for vma_mas_szero
mm/damon/sysfs-schemes: skip stats update if the scheme directory is removed
mm/memory: return vm_fault_t result from migrate_to_ram() callback
mm: correctly charge compressed memory to its memcg
ipc/shm: call underlying open/close vm_ops
gcov: clang: fix the buffer overflow issue
...
diff --git a/.mailmap b/.mailmap
index 406b99f..4a14ece 100644
--- a/.mailmap
+++ b/.mailmap
@@ -29,6 +29,7 @@
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
+Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com>
Alex Shi <alexs@kernel.org> <alex.shi@intel.com>
Alex Shi <alexs@kernel.org> <alex.shi@linaro.org>
Alex Shi <alexs@kernel.org> <alex.shi@linux.alibaba.com>
@@ -382,6 +383,7 @@
Santosh Shilimkar <ssantosh@kernel.org>
Sarangdhar Joshi <spjoshi@codeaurora.org>
Sascha Hauer <s.hauer@pengutronix.de>
+Satya Priya <quic_c_skakit@quicinc.com> <skakit@codeaurora.org>
S.Çağlar Onur <caglar@pardus.org.tr>
Sean Christopherson <seanjc@google.com> <sean.j.christopherson@intel.com>
Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>
diff --git a/MAINTAINERS b/MAINTAINERS
index c962415..a1afc62 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10287,7 +10287,7 @@
F: drivers/gpu/drm/i915/gvt/
INTEL HID EVENT DRIVER
-M: Alex Hung <alex.hung@canonical.com>
+M: Alex Hung <alexhung@gmail.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/intel/hid.c
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 77ff8e9..dc359b5 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -495,14 +495,22 @@
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
{
struct buffer_head *bh;
+ void *kaddr;
+ struct nilfs_segment_usage *su;
int ret;
+ down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
if (!ret) {
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
+ kaddr = kmap_atomic(bh->b_page);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ nilfs_segment_usage_set_dirty(su);
+ kunmap_atomic(kaddr);
brelse(bh);
}
+ up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 5101131..4409601 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -115,7 +115,7 @@
#endif
show_val_kb(m, "PageTables: ",
global_node_page_state(NR_PAGETABLE));
- show_val_kb(m, "SecPageTables: ",
+ show_val_kb(m, "SecPageTables: ",
global_node_page_state(NR_SECONDARY_PAGETABLE));
show_val_kb(m, "NFS_Unstable: ", 0);
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f6e254..444236d 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,7 +20,6 @@
atomic_t space;
unsigned long verbose;
bool task_filter;
- bool no_warn;
unsigned long stacktrace_depth;
unsigned long require_start;
unsigned long require_end;
@@ -32,6 +31,10 @@
struct dentry *dname;
};
+enum fault_flags {
+ FAULT_NOWARN = 1 << 0,
+};
+
#define FAULT_ATTR_INITIALIZER { \
.interval = 1, \
.times = ATOMIC_INIT(1), \
@@ -40,11 +43,11 @@
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
- .no_warn = false, \
}
#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
int setup_fault_attr(struct fault_attr *attr, char *str);
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
bool should_fail(struct fault_attr *attr, ssize_t size);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/include/linux/license.h b/include/linux/license.h
index 7cce390..ad937f5 100644
--- a/include/linux/license.h
+++ b/include/linux/license.h
@@ -2,6 +2,8 @@
#ifndef __LICENSE_H
#define __LICENSE_H
+#include <linux/string.h>
+
static inline int license_is_gpl_compatible(const char *license)
{
return (strcmp(license, "GPL") == 0
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 935af49..760455d 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -171,15 +171,15 @@
TRACE_EVENT(mm_khugepaged_scan_file,
- TP_PROTO(struct mm_struct *mm, struct page *page, const char *filename,
+ TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file,
int present, int swap, int result),
- TP_ARGS(mm, page, filename, present, swap, result),
+ TP_ARGS(mm, page, file, present, swap, result),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(unsigned long, pfn)
- __string(filename, filename)
+ __string(filename, file->f_path.dentry->d_iname)
__field(int, present)
__field(int, swap)
__field(int, result)
@@ -188,7 +188,7 @@
TP_fast_assign(
__entry->mm = mm;
__entry->pfn = page ? page_to_pfn(page) : -1;
- __assign_str(filename, filename);
+ __assign_str(filename, file->f_path.dentry->d_iname);
__entry->present = present;
__entry->swap = swap;
__entry->result = result;
diff --git a/ipc/shm.c b/ipc/shm.c
index 7d86f05..bd2fcc4 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -275,10 +275,8 @@
}
-static int __shm_open(struct vm_area_struct *vma)
+static int __shm_open(struct shm_file_data *sfd)
{
- struct file *file = vma->vm_file;
- struct shm_file_data *sfd = shm_file_data(file);
struct shmid_kernel *shp;
shp = shm_lock(sfd->ns, sfd->id);
@@ -302,7 +300,15 @@
/* This is called by fork, once for every shm attach. */
static void shm_open(struct vm_area_struct *vma)
{
- int err = __shm_open(vma);
+ struct file *file = vma->vm_file;
+ struct shm_file_data *sfd = shm_file_data(file);
+ int err;
+
+ /* Always call underlying open if present */
+ if (sfd->vm_ops->open)
+ sfd->vm_ops->open(vma);
+
+ err = __shm_open(sfd);
/*
* We raced in the idr lookup or with shm_destroy().
* Either way, the ID is busted.
@@ -359,10 +365,8 @@
* The descriptor has already been removed from the current->mm->mmap list
* and will later be kfree()d.
*/
-static void shm_close(struct vm_area_struct *vma)
+static void __shm_close(struct shm_file_data *sfd)
{
- struct file *file = vma->vm_file;
- struct shm_file_data *sfd = shm_file_data(file);
struct shmid_kernel *shp;
struct ipc_namespace *ns = sfd->ns;
@@ -388,6 +392,18 @@
up_write(&shm_ids(ns).rwsem);
}
+static void shm_close(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct shm_file_data *sfd = shm_file_data(file);
+
+ /* Always call underlying close if present */
+ if (sfd->vm_ops->close)
+ sfd->vm_ops->close(vma);
+
+ __shm_close(sfd);
+}
+
/* Called with ns->shm_ids(ns).rwsem locked */
static int shm_try_destroy_orphaned(int id, void *p, void *data)
{
@@ -583,13 +599,13 @@
* IPC ID that was removed, and possibly even reused by another shm
* segment already. Propagate this case as an error to caller.
*/
- ret = __shm_open(vma);
+ ret = __shm_open(sfd);
if (ret)
return ret;
ret = call_mmap(sfd->file, vma);
if (ret) {
- shm_close(vma);
+ __shm_close(sfd);
return ret;
}
sfd->vm_ops = vma->vm_ops;
diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c
index cbb0bed..7670a81 100644
--- a/kernel/gcov/clang.c
+++ b/kernel/gcov/clang.c
@@ -280,6 +280,8 @@
for (i = 0; i < sfn_ptr->num_counters; i++)
dfn_ptr->counters[i] += sfn_ptr->counters[i];
+
+ sfn_ptr = list_next_entry(sfn_ptr, head);
}
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c3c0b07..a100541 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2107,6 +2107,7 @@
depends on DEBUG_KERNEL
depends on KPROBES
depends on KUNIT
+ select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
default KUNIT_ALL_TESTS
help
This option provides for testing basic kprobes functionality on
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 96e092d..adb2f93 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,9 +41,6 @@
static void fail_dump(struct fault_attr *attr)
{
- if (attr->no_warn)
- return;
-
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
"name %pd, interval %lu, probability %lu, "
@@ -103,7 +100,7 @@
* http://www.nongnu.org/failmalloc/
*/
-bool should_fail(struct fault_attr *attr, ssize_t size)
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
{
if (in_task()) {
unsigned int fail_nth = READ_ONCE(current->fail_nth);
@@ -146,13 +143,19 @@
return false;
fail:
- fail_dump(attr);
+ if (!(flags & FAULT_NOWARN))
+ fail_dump(attr);
if (atomic_read(&attr->times) != -1)
atomic_dec_not_zero(&attr->times);
return true;
}
+
+bool should_fail(struct fault_attr *attr, ssize_t size)
+{
+ return should_fail_ex(attr, size, 0);
+}
EXPORT_SYMBOL_GPL(should_fail);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 9f1219a..5ce4033 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -2339,6 +2339,10 @@
damon_for_each_scheme(scheme, ctx) {
struct damon_sysfs_stats *sysfs_stats;
+ /* user could have removed the scheme sysfs dir */
+ if (schemes_idx >= sysfs_schemes->nr)
+ break;
+
sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats;
sysfs_stats->nr_tried = scheme->stat.nr_tried;
sysfs_stats->sz_tried = scheme->stat.sz_tried;
diff --git a/mm/failslab.c b/mm/failslab.c
index 58df978..ffc420c 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -16,6 +16,8 @@
bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
+ int flags = 0;
+
/* No fault-injection for bootstrap cache */
if (unlikely(s == kmem_cache))
return false;
@@ -30,10 +32,16 @@
if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
return false;
+ /*
+ * In some cases, it expects to specify __GFP_NOWARN
+ * to avoid printing any information(not just a warning),
+ * thus avoiding deadlocks. See commit 6b9dbedbe349 for
+ * details.
+ */
if (gfpflags & __GFP_NOWARN)
- failslab.attr.no_warn = true;
+ flags |= FAULT_NOWARN;
- return should_fail(&failslab.attr, s->object_size);
+ return should_fail_ex(&failslab.attr, s->object_size, flags);
}
static int __init setup_failslab(char *str)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e48f8ef..f1385c3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1800,6 +1800,7 @@
/* we rely on prep_new_huge_page to set the destructor */
set_compound_order(page, order);
+ __ClearPageReserved(page);
__SetPageHead(page);
for (i = 0; i < nr_pages; i++) {
p = nth_page(page, i);
@@ -1816,7 +1817,8 @@
* on the head page when they need know if put_page() is needed
* after get_user_pages().
*/
- __ClearPageReserved(p);
+ if (i != 0) /* head page cleared above */
+ __ClearPageReserved(p);
/*
* Subtle and very unlikely
*
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
index 7e49685..46ecea1 100644
--- a/mm/kfence/report.c
+++ b/mm/kfence/report.c
@@ -75,18 +75,23 @@
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") ||
+ str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") ||
!strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) {
/*
- * In case of tail calls from any of the below
- * to any of the above.
+ * In case of tail calls from any of the below to any of
+ * the above, optimized by the compiler such that the
+ * stack trace would omit the initial entry point below.
*/
fallback = skipnr + 1;
}
- /* Also the *_bulk() variants by only checking prefixes. */
+ /*
+ * The below list should only include the initial entry points
+ * into the slab allocators. Includes the *_bulk() variants by
+ * checking prefixes.
+ */
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
- str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
goto found;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 4734315..a8d5ef2 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -97,8 +97,8 @@
/* Num pages scanned per node */
u32 node_load[MAX_NUMNODES];
- /* Last target selected in hpage_collapse_find_target_node() */
- int last_target_node;
+ /* nodemask for allocation fallback */
+ nodemask_t alloc_nmask;
};
/**
@@ -734,7 +734,6 @@
struct collapse_control khugepaged_collapse_control = {
.is_khugepaged = true,
- .last_target_node = NUMA_NO_NODE,
};
static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc)
@@ -783,16 +782,11 @@
target_node = nid;
}
- /* do some balance if several nodes have the same hit record */
- if (target_node <= cc->last_target_node)
- for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES;
- nid++)
- if (max_value == cc->node_load[nid]) {
- target_node = nid;
- break;
- }
+ for_each_online_node(nid) {
+ if (max_value == cc->node_load[nid])
+ node_set(nid, cc->alloc_nmask);
+ }
- cc->last_target_node = target_node;
return target_node;
}
#else
@@ -802,9 +796,10 @@
}
#endif
-static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node)
+static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node,
+ nodemask_t *nmask)
{
- *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
+ *hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask);
if (unlikely(!*hpage)) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
return false;
@@ -955,12 +950,11 @@
static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
struct collapse_control *cc)
{
- /* Only allocate from the target node */
gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() :
- GFP_TRANSHUGE) | __GFP_THISNODE;
+ GFP_TRANSHUGE);
int node = hpage_collapse_find_target_node(cc);
- if (!hpage_collapse_alloc_page(hpage, gfp, node))
+ if (!hpage_collapse_alloc_page(hpage, gfp, node, &cc->alloc_nmask))
return SCAN_ALLOC_HUGE_PAGE_FAIL;
if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp)))
return SCAN_CGROUP_CHARGE_FAIL;
@@ -1144,6 +1138,7 @@
goto out;
memset(cc->node_load, 0, sizeof(cc->node_load));
+ nodes_clear(cc->alloc_nmask);
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
for (_address = address, _pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, _address += PAGE_SIZE) {
@@ -2077,6 +2072,7 @@
present = 0;
swap = 0;
memset(cc->node_load, 0, sizeof(cc->node_load));
+ nodes_clear(cc->alloc_nmask);
rcu_read_lock();
xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) {
if (xas_retry(&xas, page))
@@ -2157,8 +2153,7 @@
}
}
- trace_mm_khugepaged_scan_file(mm, page, file->f_path.dentry->d_iname,
- present, swap, result);
+ trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result);
return result;
}
#else
@@ -2576,7 +2571,6 @@
if (!cc)
return -ENOMEM;
cc->is_khugepaged = false;
- cc->last_target_node = NUMA_NO_NODE;
mmgrab(mm);
lru_add_drain_all();
@@ -2602,6 +2596,7 @@
}
mmap_assert_locked(mm);
memset(cc->node_load, 0, sizeof(cc->node_load));
+ nodes_clear(cc->alloc_nmask);
if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
struct file *file = get_file(vma->vm_file);
pgoff_t pgoff = linear_page_index(vma, addr);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2d8549a..a1a35c1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3026,7 +3026,7 @@
{
struct obj_cgroup *objcg;
- if (!memcg_kmem_enabled() || memcg_kmem_bypass())
+ if (!memcg_kmem_enabled())
return NULL;
if (PageMemcgKmem(page)) {
diff --git a/mm/memory.c b/mm/memory.c
index f88c351..8a6d5c8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3763,7 +3763,7 @@
*/
get_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
- vmf->page->pgmap->ops->migrate_to_ram(vmf);
+ ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
put_page(vmf->page);
} else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON;
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 6fa682e..721b236 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -357,7 +357,8 @@
}
/*
- * Unmaps pages for migration. Returns number of unmapped pages.
+ * Unmaps pages for migration. Returns number of source pfns marked as
+ * migrating.
*/
static unsigned long migrate_device_unmap(unsigned long *src_pfns,
unsigned long npages,
@@ -373,8 +374,11 @@
struct page *page = migrate_pfn_to_page(src_pfns[i]);
struct folio *folio;
- if (!page)
+ if (!page) {
+ if (src_pfns[i] & MIGRATE_PFN_MIGRATE)
+ unmapped++;
continue;
+ }
/* ZONE_DEVICE pages are not on LRU */
if (!is_zone_device_page(page)) {
diff --git a/mm/mmap.c b/mm/mmap.c
index c3c5c1d..74a84eb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -456,7 +456,7 @@
* vma_mas_szero() - Set a given range to zero. Used when modifying a
* vm_area_struct start or end.
*
- * @mm: The struct_mm
+ * @mas: The maple tree ma_state
* @start: The start address to zero
* @end: The end address to zero.
*/
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 218b28e..6e60657 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3887,6 +3887,8 @@
static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
+ int flags = 0;
+
if (order < fail_page_alloc.min_order)
return false;
if (gfp_mask & __GFP_NOFAIL)
@@ -3897,10 +3899,11 @@
(gfp_mask & __GFP_DIRECT_RECLAIM))
return false;
+ /* See comment in __should_failslab() */
if (gfp_mask & __GFP_NOWARN)
- fail_page_alloc.attr.no_warn = true;
+ flags |= FAULT_NOWARN;
- return should_fail(&fail_page_alloc.attr, 1 << order);
+ return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
}
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/mm/page_ext.c b/mm/page_ext.c
index affe8024..ddf1968 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -166,7 +166,7 @@
/**
* page_ext_put() - Working with page extended information is done.
- * @page_ext - Page extended information received from page_ext_get().
+ * @page_ext: Page extended information received from page_ext_get().
*
* The page extended information of the page may not be valid after this
* function is called.
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 5fc1237..72e481a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -973,23 +973,23 @@
scan:
spin_unlock(&si->lock);
while (++offset <= READ_ONCE(si->highest_bit)) {
- if (swap_offset_available_and_locked(si, offset))
- goto checks;
if (unlikely(--latency_ration < 0)) {
cond_resched();
latency_ration = LATENCY_LIMIT;
scanned_many = true;
}
+ if (swap_offset_available_and_locked(si, offset))
+ goto checks;
}
offset = si->lowest_bit;
while (offset < scan_base) {
- if (swap_offset_available_and_locked(si, offset))
- goto checks;
if (unlikely(--latency_ration < 0)) {
cond_resched();
latency_ration = LATENCY_LIMIT;
scanned_many = true;
}
+ if (swap_offset_available_and_locked(si, offset))
+ goto checks;
offset++;
}
spin_lock(&si->lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 04d8b88..026199c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2514,8 +2514,20 @@
* the flushers simply cannot keep up with the allocation
* rate. Nudge the flusher threads in case they are asleep.
*/
- if (stat.nr_unqueued_dirty == nr_taken)
+ if (stat.nr_unqueued_dirty == nr_taken) {
wakeup_flusher_threads(WB_REASON_VMSCAN);
+ /*
+ * For cgroupv1 dirty throttling is achieved by waking up
+ * the kernel flusher here and later waiting on folios
+ * which are in writeback to finish (see shrink_folio_list()).
+ *
+ * Flusher may not be able to issue writeback quickly
+ * enough for cgroupv1 writeback throttling to work
+ * on a large system.
+ */
+ if (!writeback_throttling_sane(sc))
+ reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
+ }
sc->nr.dirty += stat.nr_dirty;
sc->nr.congested += stat.nr_congested;
@@ -4971,10 +4983,13 @@
int scanned;
int reclaimed;
LIST_HEAD(list);
+ LIST_HEAD(clean);
struct folio *folio;
+ struct folio *next;
enum vm_event_item item;
struct reclaim_stat stat;
struct lru_gen_mm_walk *walk;
+ bool skip_retry = false;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -4991,20 +5006,37 @@
if (list_empty(&list))
return scanned;
-
+retry:
reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
+ sc->nr_reclaimed += reclaimed;
- list_for_each_entry(folio, &list, lru) {
- /* restore LRU_REFS_FLAGS cleared by isolate_folio() */
- if (folio_test_workingset(folio))
- folio_set_referenced(folio);
+ list_for_each_entry_safe_reverse(folio, next, &list, lru) {
+ if (!folio_evictable(folio)) {
+ list_del(&folio->lru);
+ folio_putback_lru(folio);
+ continue;
+ }
- /* don't add rejected pages to the oldest generation */
if (folio_test_reclaim(folio) &&
- (folio_test_dirty(folio) || folio_test_writeback(folio)))
- folio_clear_active(folio);
- else
- folio_set_active(folio);
+ (folio_test_dirty(folio) || folio_test_writeback(folio))) {
+ /* restore LRU_REFS_FLAGS cleared by isolate_folio() */
+ if (folio_test_workingset(folio))
+ folio_set_referenced(folio);
+ continue;
+ }
+
+ if (skip_retry || folio_test_active(folio) || folio_test_referenced(folio) ||
+ folio_mapped(folio) || folio_test_locked(folio) ||
+ folio_test_dirty(folio) || folio_test_writeback(folio)) {
+ /* don't add rejected folios to the oldest generation */
+ set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS,
+ BIT(PG_active));
+ continue;
+ }
+
+ /* retry folios that may have missed folio_rotate_reclaimable() */
+ list_move(&folio->lru, &clean);
+ sc->nr_scanned -= folio_nr_pages(folio);
}
spin_lock_irq(&lruvec->lru_lock);
@@ -5026,7 +5058,13 @@
mem_cgroup_uncharge_list(&list);
free_unref_page_list(&list);
- sc->nr_reclaimed += reclaimed;
+ INIT_LIST_HEAD(&list);
+ list_splice_init(&clean, &list);
+
+ if (!list_empty(&list)) {
+ skip_retry = true;
+ goto retry;
+ }
if (need_swapping && type == LRU_GEN_ANON)
*need_swapping = true;
@@ -5844,8 +5882,8 @@
enum lru_list lru;
unsigned long nr_reclaimed = 0;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+ bool proportional_reclaim;
struct blk_plug plug;
- bool scan_adjusted;
if (lru_gen_enabled()) {
lru_gen_shrink_lruvec(lruvec, sc);
@@ -5868,8 +5906,8 @@
* abort proportional reclaim if either the file or anon lru has already
* dropped to zero at the first pass.
*/
- scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
- sc->priority == DEF_PRIORITY);
+ proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+ sc->priority == DEF_PRIORITY);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -5889,7 +5927,7 @@
cond_resched();
- if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
+ if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
continue;
/*
@@ -5940,8 +5978,6 @@
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
-
- scan_adjusted = true;
}
blk_finish_plug(&plug);
sc->nr_reclaimed += nr_reclaimed;