merge from branch/tag: krsi/krsi-backports-r2 into branch: cos-5.10
Changelog:
-------------------------------------------------------------
Brendan Jackman (1):
LAKITU: Add stub IDs for some BPF helpers that exist upstream
Florent Revest (7):
UPSTREAM: bpf: Factorize bpf_trace_printk and bpf_seq_printf
UPSTREAM: bpf: Add a ARG_PTR_TO_CONST_STR argument type
UPSTREAM: bpf: Lock bpf_trace_printk's tmp buf before it is written to
UPSTREAM: seq_file: Add a seq_bprintf function
UPSTREAM: bpf: Add a bpf_snprintf helper
UPSTREAM: bpf: Implement formatted output helpers with bstr_printf
UPSTREAM: bpf: Fix nested bpf_bprintf_prepare with more per-cpu buffers
Song Liu (1):
UPSTREAM: bpf: Enable task local storage for tracing programs
BUG=b/217368493
TEST=tryjob, validation
RELEASE_NOTE=None
Signed-off-by: Oleksandr Tymoshenko <ovt@google.com>
Change-Id: I8099202bac7bf6e8fc6b669e2b2ec2cabbfcdb0e
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 4727147..ad2120d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -415,6 +415,24 @@ void seq_printf(struct seq_file *m, const char *f, ...)
}
EXPORT_SYMBOL(seq_printf);
+#ifdef CONFIG_BINARY_PRINTF
+void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary)
+{
+ int len;
+
+ if (m->count < m->size) {
+ len = bstr_printf(m->buf + m->count, m->size - m->count, f,
+ binary);
+ if (m->count + len < m->size) {
+ m->count += len;
+ return;
+ }
+ }
+ seq_set_overflow(m);
+}
+EXPORT_SYMBOL(seq_bprintf);
+#endif /* CONFIG_BINARY_PRINTF */
+
/**
* mangle_path - mangle and copy path to buffer beginning
* @s: buffer start
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0fa91ee..b02f76e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -295,6 +295,7 @@ enum bpf_arg_type {
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
+ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
__BPF_ARG_TYPE_MAX,
};
@@ -1486,6 +1487,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -1680,6 +1682,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
{
return NULL;
}
+
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@ -1874,10 +1880,13 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
extern const struct bpf_func_proto bpf_copy_from_user_proto;
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
+extern const struct bpf_func_proto bpf_snprintf_proto;
extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -1995,4 +2004,8 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
+int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+ u32 **bin_buf, u32 num_args);
+void bpf_bprintf_cleanup(void);
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 0d1c33a..479c101 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
}
-static inline struct bpf_storage_blob *bpf_task(
- const struct task_struct *task)
-{
- if (unlikely(!task->security))
- return NULL;
-
- return task->security + bpf_lsm_blob_sizes.lbs_task;
-}
-
extern const struct bpf_func_proto bpf_inode_storage_get_proto;
extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
-extern const struct bpf_func_proto bpf_task_storage_get_proto;
-extern const struct bpf_func_proto bpf_task_storage_delete_proto;
void bpf_inode_storage_free(struct inode *inode);
-void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_LSM */
@@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
return NULL;
}
-static inline struct bpf_storage_blob *bpf_task(
- const struct task_struct *task)
-{
- return NULL;
-}
-
static inline void bpf_inode_storage_free(struct inode *inode)
{
}
-static inline void bpf_task_storage_free(struct task_struct *task)
-{
-}
-
#endif /* CONFIG_BPF_LSM */
#endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index e256d6e..ef53fad 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif
#ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
#endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0acf0c7..1586e82 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -39,6 +39,7 @@
struct backing_dev_info;
struct bio_list;
struct blk_plug;
+struct bpf_local_storage;
struct capture_control;
struct cfs_rq;
struct fs_struct;
@@ -1335,6 +1336,10 @@ struct task_struct {
/* Used by LSM modules for access restriction: */
void *security;
#endif
+#ifdef CONFIG_BPF_SYSCALL
+ /* Used by BPF task local storage */
+ struct bpf_local_storage __rcu *bpf_storage;
+#endif
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
unsigned long lowest_stack;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 662a8cf..6b80d0a 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -146,6 +146,10 @@ void *__seq_open_private(struct file *, const struct seq_operations *, int);
int seq_open_private(struct file *, const struct seq_operations *, int);
int seq_release_private(struct inode *, struct file *);
+#ifdef CONFIG_BINARY_PRINTF
+void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary);
+#endif
+
#define DEFINE_SEQ_ATTRIBUTE(__name) \
static int __name ## _open(struct inode *inode, struct file *file) \
{ \
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9880f34..7421afb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3831,6 +3831,33 @@ union bpf_attr {
* The **hash_algo** is returned on success,
* **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
* invalid arguments are passed.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ * Description
+ * Outputs a string into the **str** buffer of size **str_size**
+ * based on a format string stored in a read-only map pointed by
+ * **fmt**.
+ *
+ * Each format specifier in **fmt** corresponds to one u64 element
+ * in the **data** array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data*
+ * array. The *data_len* is the size of *data* in bytes.
+ *
+ * Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ * memory. Reading kernel memory may fail due to either invalid
+ * address or valid address but requiring a major memory fault. If
+ * reading kernel memory fails, the string for **%s** will be an
+ * empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ * Not returning error to bpf program is consistent with what
+ * **bpf_trace_printk**\ () does for now.
+ *
+ * Return
+ * The strictly positive length of the formatted string, including
+ * the trailing zero character. If the return value is greater than
+ * **str_size**, **str** contains a truncated string, guaranteed to
+ * be zero-terminated except when **str_size** is 0.
+ *
+ * Or **-EBUSY** if the per-CPU memory copy buffer is busy.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3995,6 +4022,10 @@ union bpf_attr {
FN(bprm_opts_set), \
FN(ktime_get_coarse_ns), \
FN(ima_inode_hash), \
+ FN(sock_from_file), /* LAKITU: backport stub, unimplemented */ \
+ FN(check_mtu), /* LAKITU: backport stub, unimplemented */ \
+ FN(for_each_map_elem), /* LAKITU: backport stub, unimplemented */ \
+ FN(snprintf), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/init/Kconfig b/init/Kconfig
index 13685bf..8e25fce 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1703,6 +1703,7 @@
select BPF
select IRQ_WORK
select TASKS_TRACE_RCU
+ select BINARY_PRINTF
default n
help
Enable the bpf() system call that allows to manipulate eBPF
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index d124934..7f33098 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -9,8 +9,8 @@
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
-obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
@@ -18,7 +18,6 @@
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
-obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o
obj-$(CONFIG_BPF_SYSCALL) += offload.o
obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
endif
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 5d3a7af9..45bcc2b 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -139,17 +139,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage *local_storage;
bool free_local_storage = false;
+ unsigned long flags;
if (unlikely(!selem_linked_to_storage(selem)))
/* selem has already been unlinked from sk */
return;
local_storage = rcu_dereference(selem->local_storage);
- raw_spin_lock_bh(&local_storage->lock);
+ raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, true);
- raw_spin_unlock_bh(&local_storage->lock);
+ raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (free_local_storage)
kfree_rcu(local_storage, rcu);
@@ -166,6 +167,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
if (unlikely(!selem_linked_to_map(selem)))
/* selem has already be unlinked from smap */
@@ -173,21 +175,22 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
smap = rcu_dereference(SDATA(selem)->smap);
b = select_bucket(smap, selem);
- raw_spin_lock_bh(&b->lock);
+ raw_spin_lock_irqsave(&b->lock, flags);
if (likely(selem_linked_to_map(selem)))
hlist_del_init_rcu(&selem->map_node);
- raw_spin_unlock_bh(&b->lock);
+ raw_spin_unlock_irqrestore(&b->lock, flags);
}
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
+ unsigned long flags;
- raw_spin_lock_bh(&b->lock);
+ raw_spin_lock_irqsave(&b->lock, flags);
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
hlist_add_head_rcu(&selem->map_node, &b->list);
- raw_spin_unlock_bh(&b->lock);
+ raw_spin_unlock_irqrestore(&b->lock, flags);
}
void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
@@ -223,16 +226,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
sdata = SDATA(selem);
if (cacheit_lockit) {
+ unsigned long flags;
+
/* spinlock is needed to avoid racing with the
* parallel delete. Otherwise, publishing an already
* deleted sdata to the cache will become a use-after-free
* problem in the next bpf_local_storage_lookup().
*/
- raw_spin_lock_bh(&local_storage->lock);
+ raw_spin_lock_irqsave(&local_storage->lock, flags);
if (selem_linked_to_storage(selem))
rcu_assign_pointer(local_storage->cache[smap->cache_idx],
sdata);
- raw_spin_unlock_bh(&local_storage->lock);
+ raw_spin_unlock_irqrestore(&local_storage->lock, flags);
}
return sdata;
@@ -325,6 +330,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage;
+ unsigned long flags;
int err;
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
@@ -372,7 +378,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}
}
- raw_spin_lock_bh(&local_storage->lock);
+ raw_spin_lock_irqsave(&local_storage->lock, flags);
/* Recheck local_storage->list under local_storage->lock */
if (unlikely(hlist_empty(&local_storage->list))) {
@@ -426,11 +432,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}
unlock:
- raw_spin_unlock_bh(&local_storage->lock);
+ raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return SDATA(selem);
unlock_err:
- raw_spin_unlock_bh(&local_storage->lock);
+ raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return ERR_PTR(err);
}
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 1622a44..9829f38 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_spin_lock_proto;
case BPF_FUNC_spin_unlock:
return &bpf_spin_unlock_proto;
- case BPF_FUNC_task_storage_get:
- return &bpf_task_storage_get_proto;
- case BPF_FUNC_task_storage_delete:
- return &bpf_task_storage_delete_proto;
case BPF_FUNC_bprm_opts_set:
return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash:
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index e0da025..baf3566 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -15,7 +15,6 @@
#include <linux/bpf_local_storage.h>
#include <linux/filter.h>
#include <uapi/linux/btf.h>
-#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>
@@ -24,12 +23,8 @@ DEFINE_BPF_STORAGE_CACHE(task_cache);
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
{
struct task_struct *task = owner;
- struct bpf_storage_blob *bsb;
- bsb = bpf_task(task);
- if (!bsb)
- return NULL;
- return &bsb->storage;
+ return &task->bpf_storage;
}
static struct bpf_local_storage_data *
@@ -38,13 +33,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
{
struct bpf_local_storage *task_storage;
struct bpf_local_storage_map *smap;
- struct bpf_storage_blob *bsb;
- bsb = bpf_task(task);
- if (!bsb)
- return NULL;
-
- task_storage = rcu_dereference(bsb->storage);
+ task_storage = rcu_dereference(task->bpf_storage);
if (!task_storage)
return NULL;
@@ -57,16 +47,12 @@ void bpf_task_storage_free(struct task_struct *task)
struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage;
bool free_task_storage = false;
- struct bpf_storage_blob *bsb;
struct hlist_node *n;
-
- bsb = bpf_task(task);
- if (!bsb)
- return;
+ unsigned long flags;
rcu_read_lock();
- local_storage = rcu_dereference(bsb->storage);
+ local_storage = rcu_dereference(task->bpf_storage);
if (!local_storage) {
rcu_read_unlock();
return;
@@ -81,7 +67,7 @@ void bpf_task_storage_free(struct task_struct *task)
* when unlinking elem from the local_storage->list and
* the map's bucket->list.
*/
- raw_spin_lock_bh(&local_storage->lock);
+ raw_spin_lock_irqsave(&local_storage->lock, flags);
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
/* Always unlink from map before unlinking from
* local_storage.
@@ -90,7 +76,7 @@ void bpf_task_storage_free(struct task_struct *task)
free_task_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, false);
}
- raw_spin_unlock_bh(&local_storage->lock);
+ raw_spin_unlock_irqrestore(&local_storage->lock, flags);
rcu_read_unlock();
/* free_task_storage should always be true as long as
@@ -150,7 +136,7 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
*/
WARN_ON_ONCE(!rcu_read_lock_held());
task = pid_task(pid, PIDTYPE_PID);
- if (!task || !task_storage_ptr(task)) {
+ if (!task) {
err = -ENOENT;
goto out;
}
@@ -213,23 +199,16 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
return (unsigned long)NULL;
- /* explicitly check that the task_storage_ptr is not
- * NULL as task_storage_lookup returns NULL in this case and
- * bpf_local_storage_update expects the owner to have a
- * valid storage pointer.
- */
- if (!task || !task_storage_ptr(task))
+ if (!task)
return (unsigned long)NULL;
sdata = task_storage_lookup(task, map, true);
if (sdata)
return (unsigned long)sdata->data;
- /* This helper must only be called from places where the lifetime of the task
- * is guaranteed. Either by being refcounted or by being protected
- * by an RCU read-side critical section.
- */
- if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+ /* only allocate new storage, when the task is refcounted */
+ if (refcount_read(&task->usage) &&
+ (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index c18b88b..5636cbd 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -670,6 +670,323 @@ const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
};
+static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
+ size_t bufsz)
+{
+ void __user *user_ptr = (__force void __user *)unsafe_ptr;
+
+ buf[0] = 0;
+
+ switch (fmt_ptype) {
+ case 's':
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ if ((unsigned long)unsafe_ptr < TASK_SIZE)
+ return strncpy_from_user_nofault(buf, user_ptr, bufsz);
+ fallthrough;
+#endif
+ case 'k':
+ return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
+ case 'u':
+ return strncpy_from_user_nofault(buf, user_ptr, bufsz);
+ }
+
+ return -EINVAL;
+}
+
+/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
+ */
+#define MAX_PRINTF_BUF_LEN 512
+
+/* Support executing three nested bprintf helper calls on a given CPU */
+struct bpf_bprintf_buffers {
+ char tmp_bufs[3][MAX_PRINTF_BUF_LEN];
+};
+static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
+static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
+
+static int try_get_fmt_tmp_buf(char **tmp_buf)
+{
+ struct bpf_bprintf_buffers *bufs;
+ int nest_level;
+
+ preempt_disable();
+ nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
+ this_cpu_dec(bpf_bprintf_nest_level);
+ preempt_enable();
+ return -EBUSY;
+ }
+ bufs = this_cpu_ptr(&bpf_bprintf_bufs);
+ *tmp_buf = bufs->tmp_bufs[nest_level - 1];
+
+ return 0;
+}
+
+void bpf_bprintf_cleanup(void)
+{
+ if (this_cpu_read(bpf_bprintf_nest_level)) {
+ this_cpu_dec(bpf_bprintf_nest_level);
+ preempt_enable();
+ }
+}
+
+/*
+ * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
+ *
+ * Returns a negative value if fmt is an invalid format string or 0 otherwise.
+ *
+ * This can be used in two ways:
+ * - Format string verification only: when bin_args is NULL
+ * - Arguments preparation: in addition to the above verification, it writes in
+ * bin_args a binary representation of arguments usable by bstr_printf where
+ * pointers from BPF have been sanitized.
+ *
+ * In argument preparation mode, if 0 is returned, safe temporary buffers are
+ * allocated and bpf_bprintf_cleanup should be called to free them after use.
+ */
+int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+ u32 **bin_args, u32 num_args)
+{
+ char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
+ size_t sizeof_cur_arg, sizeof_cur_ip;
+ int err, i, num_spec = 0;
+ u64 cur_arg;
+ char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
+
+ fmt_end = strnchr(fmt, fmt_size, 0);
+ if (!fmt_end)
+ return -EINVAL;
+ fmt_size = fmt_end - fmt;
+
+ if (bin_args) {
+ if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
+ return -EBUSY;
+
+ tmp_buf_end = tmp_buf + MAX_PRINTF_BUF_LEN;
+ *bin_args = (u32 *)tmp_buf;
+ }
+
+ for (i = 0; i < fmt_size; i++) {
+ if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (fmt[i] != '%')
+ continue;
+
+ if (fmt[i + 1] == '%') {
+ i++;
+ continue;
+ }
+
+ if (num_spec >= num_args) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* The string is zero-terminated so if fmt[i] != 0, we can
+ * always access fmt[i + 1], in the worst case it will be a 0
+ */
+ i++;
+
+ /* skip optional "[0 +-][num]" width formatting field */
+ while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
+ fmt[i] == ' ')
+ i++;
+ if (fmt[i] >= '1' && fmt[i] <= '9') {
+ i++;
+ while (fmt[i] >= '0' && fmt[i] <= '9')
+ i++;
+ }
+
+ if (fmt[i] == 'p') {
+ sizeof_cur_arg = sizeof(long);
+
+ if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
+ fmt[i + 2] == 's') {
+ fmt_ptype = fmt[i + 1];
+ i += 2;
+ goto fmt_str;
+ }
+
+ if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
+ ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
+ fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
+ fmt[i + 1] == 'S') {
+ /* just kernel pointers */
+ if (tmp_buf)
+ cur_arg = raw_args[num_spec];
+ i++;
+ goto nocopy_fmt;
+ }
+
+ if (fmt[i + 1] == 'B') {
+ if (tmp_buf) {
+ err = snprintf(tmp_buf,
+ (tmp_buf_end - tmp_buf),
+ "%pB",
+ (void *)(long)raw_args[num_spec]);
+ tmp_buf += (err + 1);
+ }
+
+ i++;
+ num_spec++;
+ continue;
+ }
+
+ /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
+ if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
+ (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ i += 2;
+ if (!tmp_buf)
+ goto nocopy_fmt;
+
+ sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
+ if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ unsafe_ptr = (char *)(long)raw_args[num_spec];
+ err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
+ sizeof_cur_ip);
+ if (err < 0)
+ memset(cur_ip, 0, sizeof_cur_ip);
+
+ /* hack: bstr_printf expects IP addresses to be
+ * pre-formatted as strings, ironically, the easiest way
+ * to do that is to call snprintf.
+ */
+ ip_spec[2] = fmt[i - 1];
+ ip_spec[3] = fmt[i];
+ err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
+ ip_spec, &cur_ip);
+
+ tmp_buf += err + 1;
+ num_spec++;
+
+ continue;
+ } else if (fmt[i] == 's') {
+ fmt_ptype = fmt[i];
+fmt_str:
+ if (fmt[i + 1] != 0 &&
+ !isspace(fmt[i + 1]) &&
+ !ispunct(fmt[i + 1])) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!tmp_buf)
+ goto nocopy_fmt;
+
+ if (tmp_buf_end == tmp_buf) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ unsafe_ptr = (char *)(long)raw_args[num_spec];
+ err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
+ fmt_ptype,
+ tmp_buf_end - tmp_buf);
+ if (err < 0) {
+ tmp_buf[0] = '\0';
+ err = 1;
+ }
+
+ tmp_buf += err;
+ num_spec++;
+
+ continue;
+ }
+
+ sizeof_cur_arg = sizeof(int);
+
+ if (fmt[i] == 'l') {
+ sizeof_cur_arg = sizeof(long);
+ i++;
+ }
+ if (fmt[i] == 'l') {
+ sizeof_cur_arg = sizeof(long long);
+ i++;
+ }
+
+ if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
+ fmt[i] != 'x' && fmt[i] != 'X') {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (tmp_buf)
+ cur_arg = raw_args[num_spec];
+nocopy_fmt:
+ if (tmp_buf) {
+ tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
+ if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ if (sizeof_cur_arg == 8) {
+ *(u32 *)tmp_buf = *(u32 *)&cur_arg;
+ *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
+ } else {
+ *(u32 *)tmp_buf = (u32)(long)cur_arg;
+ }
+ tmp_buf += sizeof_cur_arg;
+ }
+ num_spec++;
+ }
+
+ err = 0;
+out:
+ if (err)
+ bpf_bprintf_cleanup();
+ return err;
+}
+
+#define MAX_SNPRINTF_VARARGS 12
+
+BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
+ const void *, data, u32, data_len)
+{
+ int err, num_args;
+ u32 *bin_args;
+
+ if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
+ (data_len && !data))
+ return -EINVAL;
+ num_args = data_len / 8;
+
+ /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
+ * can safely give an unbounded size.
+ */
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
+ if (err < 0)
+ return err;
+
+ err = bstr_printf(str, str_size, fmt, bin_args);
+
+ bpf_bprintf_cleanup();
+
+ return err + 1;
+}
+
+const struct bpf_func_proto bpf_snprintf_proto = {
+ .func = bpf_snprintf,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_PTR_TO_CONST_STR,
+ .arg4_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
const struct bpf_func_proto bpf_get_current_task_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -758,6 +1075,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
NULL : &bpf_probe_read_kernel_str_proto;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
+ case BPF_FUNC_snprintf:
+ return &bpf_snprintf_proto;
default:
return NULL;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 435d16d..1fd9865 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4452,6 +4452,7 @@ static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_T
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
+static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@@ -4480,6 +4481,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
+ [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4727,6 +4729,45 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
if (err)
return err;
err = check_ptr_alignment(env, reg, 0, size, true);
+ } else if (arg_type == ARG_PTR_TO_CONST_STR) {
+ struct bpf_map *map = reg->map_ptr;
+ int map_off;
+ u64 map_addr;
+ char *str_ptr;
+
+ if (reg->type != PTR_TO_MAP_VALUE || !map ||
+ !bpf_map_is_rdonly(map)) {
+ verbose(env, "R%d does not point to a readonly map'\n", regno);
+ return -EACCES;
+ }
+
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "R%d is not a constant address'\n", regno);
+ return -EACCES;
+ }
+
+ if (!map->ops->map_direct_value_addr) {
+ verbose(env, "no direct value access support for this map type\n");
+ return -EACCES;
+ }
+
+ err = check_map_access(env, regno, reg->off,
+ map->value_size - reg->off, false);
+ if (err)
+ return err;
+
+ map_off = reg->off + reg->var_off.value;
+ err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
+ if (err) {
+ verbose(env, "direct value access on string failed\n");
+ return err;
+ }
+
+ str_ptr = (char *)(long)(map_addr);
+ if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
+ verbose(env, "string is not zero-terminated\n");
+ return -EINVAL;
+ }
}
return err;
@@ -5417,6 +5458,41 @@ static int check_reference_leak(struct bpf_verifier_env *env)
return state->acquired_refs ? -EINVAL : 0;
}
+static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs)
+{
+ struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3];
+ struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5];
+ struct bpf_map *fmt_map = fmt_reg->map_ptr;
+ int err, fmt_map_off, num_args;
+ u64 fmt_addr;
+ char *fmt;
+
+ /* data must be an array of u64 */
+ if (data_len_reg->var_off.value % 8)
+ return -EINVAL;
+ num_args = data_len_reg->var_off.value / 8;
+
+ /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
+ * and map_direct_value_addr is set.
+ */
+ fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
+ err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
+ fmt_map_off);
+ if (err)
+ return err;
+ fmt = (char *)(long)fmt_addr + fmt_map_off;
+
+ /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
+ * can focus on validating the format specifiers.
+ */
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
+ if (err < 0)
+ verbose(env, "Invalid format string\n");
+
+ return err;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
const struct bpf_func_proto *fn = NULL;
@@ -5521,6 +5597,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
+ if (func_id == BPF_FUNC_snprintf) {
+ err = check_bpf_snprintf_call(env, regs);
+ if (err < 0)
+ return err;
+ }
+
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
diff --git a/kernel/fork.c b/kernel/fork.c
index 457f7d2..2c854c1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -96,6 +96,7 @@
#include <linux/kasan.h>
#include <linux/scs.h>
#include <linux/io_uring.h>
+#include <linux/bpf.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -733,6 +734,7 @@ void __put_task_struct(struct task_struct *tsk)
cgroup_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
+ bpf_task_storage_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
@@ -2069,6 +2071,9 @@ static __latent_entropy struct task_struct *copy_process(
p->sequential_io = 0;
p->sequential_io_avg = 0;
#endif
+#ifdef CONFIG_BPF_SYSCALL
+ RCU_INIT_POINTER(p->bpf_storage, NULL);
+#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b6697d5..3d2488c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -359,188 +359,34 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
return &bpf_probe_write_user_proto;
}
-static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
- size_t bufsz)
-{
- void __user *user_ptr = (__force void __user *)unsafe_ptr;
-
- buf[0] = 0;
-
- switch (fmt_ptype) {
- case 's':
-#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
- if ((unsigned long)unsafe_ptr < TASK_SIZE) {
- strncpy_from_user_nofault(buf, user_ptr, bufsz);
- break;
- }
- fallthrough;
-#endif
- case 'k':
- strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
- break;
- case 'u':
- strncpy_from_user_nofault(buf, user_ptr, bufsz);
- break;
- }
-}
-
static DEFINE_RAW_SPINLOCK(trace_printk_lock);
-#define BPF_TRACE_PRINTK_SIZE 1024
+#define MAX_TRACE_PRINTK_VARARGS 3
+#define BPF_TRACE_PRINTK_SIZE 1024
-static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...)
-{
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
- va_list ap;
- int ret;
-
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- va_start(ap, fmt);
- ret = vsnprintf(buf, sizeof(buf), fmt, ap);
- va_end(ap);
- /* vsnprintf() will not append null for zero-length strings */
- if (ret == 0)
- buf[0] = '\0';
- trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
-
- return ret;
-}
-
-/*
- * Only limited trace_printk() conversion specifiers allowed:
- * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
- */
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
{
- int i, mod[3] = {}, fmt_cnt = 0;
- char buf[64], fmt_ptype;
- void *unsafe_ptr = NULL;
- bool str_seen = false;
+ u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
+ u32 *bin_args;
+ static char buf[BPF_TRACE_PRINTK_SIZE];
+ unsigned long flags;
+ int ret;
- /*
- * bpf_check()->check_func_arg()->check_stack_boundary()
- * guarantees that fmt points to bpf program stack,
- * fmt_size bytes of it were initialized and fmt_size > 0
- */
- if (fmt[--fmt_size] != 0)
- return -EINVAL;
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
+ MAX_TRACE_PRINTK_VARARGS);
+ if (ret < 0)
+ return ret;
- /* check format string for allowed specifiers */
- for (i = 0; i < fmt_size; i++) {
- if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
- return -EINVAL;
+ raw_spin_lock_irqsave(&trace_printk_lock, flags);
+ ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
- if (fmt[i] != '%')
- continue;
+ trace_bpf_trace_printk(buf);
+ raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
- if (fmt_cnt >= 3)
- return -EINVAL;
+ bpf_bprintf_cleanup();
- /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
- i++;
- if (fmt[i] == 'l') {
- mod[fmt_cnt]++;
- i++;
- } else if (fmt[i] == 'p') {
- mod[fmt_cnt]++;
- if ((fmt[i + 1] == 'k' ||
- fmt[i + 1] == 'u') &&
- fmt[i + 2] == 's') {
- fmt_ptype = fmt[i + 1];
- i += 2;
- goto fmt_str;
- }
-
- if (fmt[i + 1] == 'B') {
- i++;
- goto fmt_next;
- }
-
- /* disallow any further format extensions */
- if (fmt[i + 1] != 0 &&
- !isspace(fmt[i + 1]) &&
- !ispunct(fmt[i + 1]))
- return -EINVAL;
-
- goto fmt_next;
- } else if (fmt[i] == 's') {
- mod[fmt_cnt]++;
- fmt_ptype = fmt[i];
-fmt_str:
- if (str_seen)
- /* allow only one '%s' per fmt string */
- return -EINVAL;
- str_seen = true;
-
- if (fmt[i + 1] != 0 &&
- !isspace(fmt[i + 1]) &&
- !ispunct(fmt[i + 1]))
- return -EINVAL;
-
- switch (fmt_cnt) {
- case 0:
- unsafe_ptr = (void *)(long)arg1;
- arg1 = (long)buf;
- break;
- case 1:
- unsafe_ptr = (void *)(long)arg2;
- arg2 = (long)buf;
- break;
- case 2:
- unsafe_ptr = (void *)(long)arg3;
- arg3 = (long)buf;
- break;
- }
-
- bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype,
- sizeof(buf));
- goto fmt_next;
- }
-
- if (fmt[i] == 'l') {
- mod[fmt_cnt]++;
- i++;
- }
-
- if (fmt[i] != 'i' && fmt[i] != 'd' &&
- fmt[i] != 'u' && fmt[i] != 'x')
- return -EINVAL;
-fmt_next:
- fmt_cnt++;
- }
-
-/* Horrid workaround for getting va_list handling working with different
- * argument type combinations generically for 32 and 64 bit archs.
- */
-#define __BPF_TP_EMIT() __BPF_ARG3_TP()
-#define __BPF_TP(...) \
- bpf_do_trace_printk(fmt, ##__VA_ARGS__)
-
-#define __BPF_ARG1_TP(...) \
- ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_TP(arg1, ##__VA_ARGS__) \
- : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_TP((long)arg1, ##__VA_ARGS__) \
- : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
-
-#define __BPF_ARG2_TP(...) \
- ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
- : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
- : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
-
-#define __BPF_ARG3_TP(...) \
- ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
- : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
- : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
-
- return __BPF_TP_EMIT();
+ return ret;
}
static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -568,184 +414,27 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
}
#define MAX_SEQ_PRINTF_VARARGS 12
-#define MAX_SEQ_PRINTF_MAX_MEMCPY 6
-#define MAX_SEQ_PRINTF_STR_LEN 128
-
-struct bpf_seq_printf_buf {
- char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
-};
-static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
-static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
const void *, data, u32, data_len)
{
- int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
- int i, buf_used, copy_size, num_args;
- u64 params[MAX_SEQ_PRINTF_VARARGS];
- struct bpf_seq_printf_buf *bufs;
- const u64 *args = data;
+ int err, num_args;
+ u32 *bin_args;
- buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
- if (WARN_ON_ONCE(buf_used > 1)) {
- err = -EBUSY;
- goto out;
- }
-
- bufs = this_cpu_ptr(&bpf_seq_printf_buf);
-
- /*
- * bpf_check()->check_func_arg()->check_stack_boundary()
- * guarantees that fmt points to bpf program stack,
- * fmt_size bytes of it were initialized and fmt_size > 0
- */
- if (fmt[--fmt_size] != 0)
- goto out;
-
- if (data_len & 7)
- goto out;
-
- for (i = 0; i < fmt_size; i++) {
- if (fmt[i] == '%') {
- if (fmt[i + 1] == '%')
- i++;
- else if (!data || !data_len)
- goto out;
- }
- }
-
+ if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
+ (data_len && !data))
+ return -EINVAL;
num_args = data_len / 8;
- /* check format string for allowed specifiers */
- for (i = 0; i < fmt_size; i++) {
- /* only printable ascii for now. */
- if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
- err = -EINVAL;
- goto out;
- }
+ err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ if (err < 0)
+ return err;
- if (fmt[i] != '%')
- continue;
+ seq_bprintf(m, fmt, bin_args);
- if (fmt[i + 1] == '%') {
- i++;
- continue;
- }
+ bpf_bprintf_cleanup();
- if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
- err = -E2BIG;
- goto out;
- }
-
- if (fmt_cnt >= num_args) {
- err = -EINVAL;
- goto out;
- }
-
- /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
- i++;
-
- /* skip optional "[0 +-][num]" width formating field */
- while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
- fmt[i] == ' ')
- i++;
- if (fmt[i] >= '1' && fmt[i] <= '9') {
- i++;
- while (fmt[i] >= '0' && fmt[i] <= '9')
- i++;
- }
-
- if (fmt[i] == 's') {
- void *unsafe_ptr;
-
- /* try our best to copy */
- if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
- err = -E2BIG;
- goto out;
- }
-
- unsafe_ptr = (void *)(long)args[fmt_cnt];
- err = strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt],
- unsafe_ptr, MAX_SEQ_PRINTF_STR_LEN);
- if (err < 0)
- bufs->buf[memcpy_cnt][0] = '\0';
- params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
- fmt_cnt++;
- memcpy_cnt++;
- continue;
- }
-
- if (fmt[i] == 'p') {
- if (fmt[i + 1] == 0 ||
- fmt[i + 1] == 'K' ||
- fmt[i + 1] == 'x' ||
- fmt[i + 1] == 'B') {
- /* just kernel pointers */
- params[fmt_cnt] = args[fmt_cnt];
- fmt_cnt++;
- continue;
- }
-
- /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
- if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
- err = -EINVAL;
- goto out;
- }
- if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
- err = -EINVAL;
- goto out;
- }
-
- if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
- err = -E2BIG;
- goto out;
- }
-
-
- copy_size = (fmt[i + 2] == '4') ? 4 : 16;
-
- err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
- (void *) (long) args[fmt_cnt],
- copy_size);
- if (err < 0)
- memset(bufs->buf[memcpy_cnt], 0, copy_size);
- params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
- i += 2;
- fmt_cnt++;
- memcpy_cnt++;
- continue;
- }
-
- if (fmt[i] == 'l') {
- i++;
- if (fmt[i] == 'l')
- i++;
- }
-
- if (fmt[i] != 'i' && fmt[i] != 'd' &&
- fmt[i] != 'u' && fmt[i] != 'x' &&
- fmt[i] != 'X') {
- err = -EINVAL;
- goto out;
- }
-
- params[fmt_cnt] = args[fmt_cnt];
- fmt_cnt++;
- }
-
- /* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
- * all of them to seq_printf().
- */
- seq_printf(m, fmt, params[0], params[1], params[2], params[3],
- params[4], params[5], params[6], params[7], params[8],
- params[9], params[10], params[11]);
-
- err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
-out:
- this_cpu_dec(bpf_seq_printf_buf_used);
- return err;
+ return seq_has_overflowed(m) ? -EOVERFLOW : 0;
}
BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
@@ -1355,6 +1044,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_per_cpu_ptr_proto;
case BPF_FUNC_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto;
+ case BPF_FUNC_task_storage_get:
+ return &bpf_task_storage_get_proto;
+ case BPF_FUNC_task_storage_delete:
+ return &bpf_task_storage_delete_proto;
+ case BPF_FUNC_snprintf:
+ return &bpf_snprintf_proto;
default:
return NULL;
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9880f34..7421afb 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3831,6 +3831,33 @@ union bpf_attr {
* The **hash_algo** is returned on success,
* **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
* invalid arguments are passed.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ * Description
+ * Outputs a string into the **str** buffer of size **str_size**
+ * based on a format string stored in a read-only map pointed by
+ * **fmt**.
+ *
+ * Each format specifier in **fmt** corresponds to one u64 element
+ * in the **data** array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data*
+ * array. The *data_len* is the size of *data* in bytes.
+ *
+ * Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ * memory. Reading kernel memory may fail due to either invalid
+ * address or valid address but requiring a major memory fault. If
+ * reading kernel memory fails, the string for **%s** will be an
+ * empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ * Not returning error to bpf program is consistent with what
+ * **bpf_trace_printk**\ () does for now.
+ *
+ * Return
+ * The strictly positive length of the formatted string, including
+ * the trailing zero character. If the return value is greater than
+ * **str_size**, **str** contains a truncated string, guaranteed to
+ * be zero-terminated except when **str_size** is 0.
+ *
+ * Or **-EBUSY** if the per-CPU memory copy buffer is busy.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3995,6 +4022,10 @@ union bpf_attr {
FN(bprm_opts_set), \
FN(ktime_get_coarse_ns), \
FN(ima_inode_hash), \
+ FN(sock_from_file), /* LAKITU: backport stub, unimplemented */ \
+ FN(check_mtu), /* LAKITU: backport stub, unimplemented */ \
+ FN(for_each_map_elem), /* LAKITU: backport stub, unimplemented */ \
+ FN(snprintf), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper