blob: 81f3d9f41f4348fb6426a109657ebda1fafc58e5 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0+
#include <linux/ctype.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swapfile.h>
#include <linux/debugfs.h>
#include <linux/mm_metrics.h>
/* make sure swapout timestamp won't wrap around within a year */
#define SECONDS_PER_YEAR (60 * 60 * 24 * 365)
/* max number of buckets for histogram */
#define MAX_HISTOGRAM_SIZE 100
/* max number of digits in decimal for threshold plus one space */
#define MAX_CHARS_PER_THRESHOLD (20 + 1)
bool swap_refault_enabled __read_mostly;
struct histogram __rcu *mm_metrics_files[NR_MM_METRICS];
static const char *const mm_metrics_names[] = {
"swap_refault",
"swap_latency",
"reclaim_latency",
};
static DEFINE_SPINLOCK(histogram_lock);
static struct histogram *histogram_alloc(const u64 *thresholds,
unsigned int size)
{
int i;
int len;
struct histogram *hist;
VM_BUG_ON(!size || size > MAX_HISTOGRAM_SIZE);
len = sizeof(struct histogram) + size * sizeof(*hist->thresholds);
hist = kmalloc(len, GFP_ATOMIC);
if (!hist)
return ERR_PTR(-ENOMEM);
len = size * sizeof(*hist->buckets);
hist->buckets = __alloc_percpu_gfp(len, __alignof__(*hist->buckets),
GFP_ATOMIC);
if (!hist->buckets) {
kfree(hist);
return ERR_PTR(-ENOMEM);
}
hist->size = size;
for (i = 0; i < size; i++) {
VM_BUG_ON(i && thresholds[i - 1] >= thresholds[i]);
hist->thresholds[i] = thresholds[i];
}
VM_BUG_ON(thresholds[i - 1] != U64_MAX);
return hist;
}
static struct histogram *histogram_create(char *buf)
{
int i;
unsigned int size;
u64 *thresholds;
struct histogram *hist;
if (!*buf)
return ERR_PTR(-EINVAL);
thresholds = kmalloc_array(MAX_HISTOGRAM_SIZE, sizeof(*thresholds),
GFP_KERNEL);
if (!thresholds)
return ERR_PTR(-ENOMEM);
for (i = 0; i < MAX_HISTOGRAM_SIZE; i++) {
thresholds[i] = simple_strtoull(buf, &buf, 0);
if (!*buf)
break;
if (!isspace(*buf)) {
hist = ERR_PTR(-EINVAL);
goto failed;
}
while (isspace(*buf))
buf++;
}
if (i == MAX_HISTOGRAM_SIZE) {
hist = ERR_PTR(-E2BIG);
goto failed;
}
/* the last theshold must be U64_MAX, add it if missing */
if (thresholds[i++] != U64_MAX) {
if (i == MAX_HISTOGRAM_SIZE) {
hist = ERR_PTR(-E2BIG);
goto failed;
}
thresholds[i++] = U64_MAX;
}
size = i;
for (i = 1; i < size; i++) {
if (thresholds[i - 1] >= thresholds[i]) {
hist = ERR_PTR(-EINVAL);
goto failed;
}
}
hist = histogram_alloc(thresholds, size);
failed:
kfree(thresholds);
return hist;
}
static void histogram_free(struct rcu_head *rcu)
{
struct histogram *hist = container_of(rcu, struct histogram, rcu);
VM_BUG_ON(!hist->size || hist->size > MAX_HISTOGRAM_SIZE);
free_percpu(hist->buckets);
kfree(hist);
}
static int mm_metrics_read(struct seq_file *sf, void *v)
{
int i;
int cpu;
u64 *buckets;
struct histogram *hist;
int rc = 0;
unsigned int type = (unsigned long)sf->private;
VM_BUG_ON(type >= NR_MM_METRICS);
rcu_read_lock();
hist = rcu_dereference(mm_metrics_files[type]);
if (!hist) {
seq_puts(sf, "disabled\n");
goto unlock;
}
VM_BUG_ON(!hist->size || hist->size > MAX_HISTOGRAM_SIZE);
buckets = kmalloc_array(hist->size, sizeof(*buckets), GFP_NOWAIT);
if (!buckets) {
rc = -ENOMEM;
goto unlock;
}
memset(buckets, 0, hist->size * sizeof(*buckets));
for_each_possible_cpu(cpu) {
for (i = 0; i < hist->size; i++)
buckets[i] += per_cpu(hist->buckets[i], cpu);
}
for (i = 0; i < hist->size; i++) {
u64 lower = i ? hist->thresholds[i - 1] + 1 : 0;
u64 upper = hist->thresholds[i];
VM_BUG_ON(lower > upper);
seq_printf(sf, "%llu-%llu %llu\n", lower, upper, buckets[i]);
}
VM_BUG_ON(hist->thresholds[i - 1] != U64_MAX);
kfree(buckets);
unlock:
rcu_read_unlock();
return rc;
}
static ssize_t mm_metrics_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
char *raw, *trimmed;
struct histogram *old, *new = NULL;
unsigned int type = (unsigned long)file_inode(file)->i_private;
VM_BUG_ON(type >= NR_MM_METRICS);
if (len > MAX_HISTOGRAM_SIZE * MAX_CHARS_PER_THRESHOLD)
return -E2BIG;
raw = memdup_user_nul(buf, len);
if (IS_ERR(raw))
return PTR_ERR(raw);
trimmed = strim(raw);
if (!strcmp(trimmed, "clear")) {
rcu_read_lock();
old = rcu_dereference(mm_metrics_files[type]);
if (old)
new = histogram_alloc(old->thresholds, old->size);
rcu_read_unlock();
} else if (strcmp(trimmed, "disable"))
new = histogram_create(trimmed);
kfree(raw);
if (IS_ERR(new))
return PTR_ERR(new);
spin_lock(&histogram_lock);
old = rcu_dereference_protected(mm_metrics_files[type],
lockdep_is_held(&histogram_lock));
rcu_assign_pointer(mm_metrics_files[type], new);
spin_unlock(&histogram_lock);
if (old)
call_rcu(&old->rcu, histogram_free);
return len;
}
static int mm_metrics_open(struct inode *inode, struct file *file)
{
return single_open(file, mm_metrics_read, inode->i_private);
}
static const struct file_operations mm_metrics_ops = {
.open = mm_metrics_open,
.write = mm_metrics_write,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __init mm_metrics_init(void)
{
int i;
struct dentry *dent;
#ifdef CONFIG_SWAP
unsigned long now = ktime_get_seconds();
unsigned long size = max_swapfile_size();
if (SWP_TM_OFF_BITS > FIELD_SIZEOF(swp_entry_t, val) * BITS_PER_BYTE)
pr_err("swap refault metrics disabled: 32-bit CPU\n");
else if (size < GENMASK_ULL(SWP_TM_OFF_BITS - 1, 0) + 1)
pr_err("swap refault metrics disabled: size %ld\n", size);
else if (now + SECONDS_PER_YEAR > BIT_ULL(SWP_TIME_BITS))
pr_err("swap refault metrics disabled: time %ld\n", now);
else
swap_refault_enabled = true;
#endif
BUILD_BUG_ON(ARRAY_SIZE(mm_metrics_names) != NR_MM_METRICS);
if (!debugfs_initialized())
return -ENODEV;
dent = debugfs_create_dir("mm_metrics", NULL);
if (!dent)
return -ENODEV;
for (i = 0; i < NR_MM_METRICS; i++) {
struct dentry *fent;
if (i == MM_SWAP_REFAULT && !swap_refault_enabled)
continue;
fent = debugfs_create_file(mm_metrics_names[i], 0644, dent,
(void *)(long)i, &mm_metrics_ops);
if (IS_ERR_OR_NULL(fent)) {
debugfs_remove_recursive(dent);
return -ENODEV;
}
}
pr_info("memory metrics initialized\n");
return 0;
}
subsys_initcall(mm_metrics_init);
void mm_metrics_record(unsigned int type, u64 val, u64 count)
{
int lower, upper;
struct histogram *hist;
VM_BUG_ON(type >= NR_MM_METRICS);
rcu_read_lock();
hist = rcu_dereference(mm_metrics_files[type]);
if (!hist)
goto unlock;
VM_BUG_ON(!hist->size || hist->size > MAX_HISTOGRAM_SIZE);
lower = 0;
upper = hist->size - 1;
while (lower < upper) {
int i = (lower + upper) >> 1;
if (val <= hist->thresholds[i])
upper = i;
else
lower = i + 1;
}
this_cpu_add(hist->buckets[upper], count);
unlock:
rcu_read_unlock();
}