merge-upstream/v5.4.152 from branch/tag: upstream/v5.4.152 into branch: main-R85-13310.B-cos-5.4 Changelog: ------------------------------------------------------------- Anand K Mistry (1): perf/x86: Reset destroy callback on event init failure Changbin Du (1): tools/vm/page-types: remove dependency on opt_file for idle page tracking Dan Carpenter (1): ext2: fix sleeping in atomic bugs on error Faizel K B (1): usb: testusb: Fix for showing the connection speed Fares Mehanna (1): kvm: x86: Add AMD PMU MSRs to msrs_to_save_all[] Greg Kroah-Hartman (1): Linux 5.4.152 Jan Beulich (1): xen-netback: correct success/error reporting for the SKB-with-fraglist case Kate Hsuan (1): libata: Add ATA_HORKAGE_NO_NCQ_ON_ATI for Samsung 860 and 870 SSD. Li Zhijian (1): selftests: be sure to make khdr before other targets Linus Torvalds (1): sparc64: fix pci_iounmap() when CONFIG_PCI is not set Ming Lei (1): scsi: sd: Free scsi_disk device via put_device() Rik van Riel (1): silence nfscache allocation warnings with kvzalloc Sergey Senozhatsky (1): KVM: do not shrink halt_poll_ns below grow_start Shuah Khan (1): selftests:kvm: fix get_warnings_count() ignoring fscanf() return warn Vladimir Oltean (1): net: mdio: introduce a shutdown method to mdio device drivers Wen Xiong (1): scsi: ses: Retry failed Send/Receive Diagnostic commands Yang Yingliang (1): usb: dwc2: check return value after calling platform_get_resource() BUG=b/202635922 TEST=tryjob, validation and K8s e2e RELEASE_NOTE=Updated the Linux kernel to v5.4.152. Signed-off-by: COS Kernel Merge Bot <cloud-image-merge-automation@prod.google.com> Change-Id: I79103a14c26ff5659159b8f87c413c7f9bc14370

commit: 66127a9fe0c93b4c3cc69d840d1cd7213e77a0af [log] [tgz]
author: COS Kernel Merge Bot <cloud-image-merge-automation@prod.google.com> Sun Oct 10 01:59:13 2021 -0700
committer: COS Kernel Merge Bot <cloud-image-merge-automation@prod.google.com> Sun Oct 10 01:59:14 2021 -0700
tree: 1657230b62b87ede5ea1cd22daa4c1c009c1971d
parent: acece984078c1e1a59a5d965a39f89f2ab29b14f [diff]
parent: faaca480fd5cd1976b6db743c43ac1f8d583de72 [diff]
diff --git a/Makefile b/Makefile
index ffcdc36..dc0f5e3 100644
--- a/Makefile
+++ b/Makefile

@@ -789,11 +789,20 @@
 endif
 endif
 
-# Initialize all stack variables with a pattern, if desired.
-ifdef CONFIG_INIT_STACK_ALL
+# Initialize all stack variables with a 0xAA pattern.
+ifdef CONFIG_INIT_STACK_ALL_PATTERN
 KBUILD_CFLAGS	+= -ftrivial-auto-var-init=pattern
 endif
 
+# Initialize all stack variables with a zero value.
+ifdef CONFIG_INIT_STACK_ALL_ZERO
+# Future support for zero initialization is still being debated, see
+# https://bugs.llvm.org/show_bug.cgi?id=45497. These flags are subject to being
+# renamed or dropped.
+KBUILD_CFLAGS	+= -ftrivial-auto-var-init=zero
+KBUILD_CFLAGS	+= -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang
+endif
+
 DEBUG_CFLAGS	:= $(call cc-option, -fno-var-tracking-assignments)
 
 ifdef CONFIG_DEBUG_INFO

diff --git a/PRESUBMIT.cfg b/PRESUBMIT.cfg
new file mode 100644
index 0000000..2fcead1
--- /dev/null
+++ b/PRESUBMIT.cfg

@@ -0,0 +1,14 @@
+[Hook Overrides]
+aosp_license_check: false
+cros_license_check: false
+long_line_check: false
+stray_whitespace_check: false
+tab_check: false
+tabbed_indent_required_check: false
+signoff_check: true
+
+# Make sure RELEASE_NOTE field is present.
+release_note_field_check: true
+
+# Make sure cos_patch trailer is present.
+cos_patch_trailer_check: true

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6f013e4..04d20dc 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c

@@ -96,7 +96,7 @@
 {
 	return 0;
 }
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
 static inline u32 perf_get_misc_flags(struct pt_regs *regs)
 {
 	return 0;
@@ -127,7 +127,7 @@
 static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
 static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
 static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
-static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
+static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
 static void pmao_restore_workaround(bool ebb) { }
 #endif /* CONFIG_PPC32 */
 
@@ -182,7 +182,7 @@
  * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
  * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
  */
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
 {
 	unsigned long mmcra = regs->dsisr;
 	bool sdar_valid;
@@ -207,8 +207,7 @@
 	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
 		*addrp = mfspr(SPRN_SDAR);
 
-	if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
-		is_kernel_addr(mfspr(SPRN_SDAR)))
+	if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
 		*addrp = 0;
 }
 
@@ -447,7 +446,7 @@
 }
 
 /* Processing BHRB entries */
-static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
 {
 	u64 val;
 	u64 addr;
@@ -475,8 +474,7 @@
 			 * exporting it to userspace (avoid exposure of regions
 			 * where we could have speculative execution)
 			 */
-			if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
-				is_kernel_addr(addr))
+			if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
 				continue;
 
 			/* Branches are read most recent first (ie. mfbhrb 0 is
@@ -2120,12 +2118,12 @@
 
 		if (event->attr.sample_type &
 		    (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
-			perf_get_data_addr(regs, &data.addr);
+			perf_get_data_addr(event, regs, &data.addr);
 
 		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
 			struct cpu_hw_events *cpuhw;
 			cpuhw = this_cpu_ptr(&cpu_hw_events);
-			power_pmu_bhrb_read(cpuhw);
+			power_pmu_bhrb_read(event, cpuhw);
 			data.br_stack = &cpuhw->bhrb_stack;
 		}
 

diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 741540d..6a3b599 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c

@@ -555,9 +555,11 @@
 	 * Note that the default paranoia setting permits unprivileged
 	 * users to profile the kernel.
 	 */
-	if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
-	    !capable(CAP_SYS_ADMIN))
-		return -EACCES;
+	if (event->attr.exclude_kernel) {
+		ret = perf_allow_kernel(&event->attr);
+		if (ret)
+			return ret;
+	}
 
 	if (x86_add_exclusive(x86_lbr_exclusive_bts))
 		return -EBUSY;

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 70758f9..8f9a8bc 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c

@@ -3317,8 +3317,9 @@
 	if (x86_pmu.version < 3)
 		return -EINVAL;
 
-	if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-		return -EACCES;
+	ret = perf_allow_cpu(&event->attr);
+	if (ret)
+		return ret;
 
 	event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
 

diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index dee579e..a4cc660 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c

@@ -776,8 +776,9 @@
 	 * the user needs special permissions to be able to use it
 	 */
 	if (p4_ht_active() && p4_event_bind_map[v].shared) {
-		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
+		v = perf_allow_cpu(&event->attr);
+		if (v)
+			return v;
 	}
 
 	/* ESCR EventMask bits may be invalid */

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 28b92e3..796d660 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig

@@ -59,6 +59,15 @@
 	  rescue mode with init=/bin/sh, even when the /dev directory
 	  on the rootfs is completely empty.
 
+config DEVTMPFS_SAFE
+	bool "Automount devtmpfs with nosuid/noexec"
+	depends on DEVTMPFS_MOUNT
+	default y
+	help
+	  This instructs the kernel to automount devtmpfs with the
+	  MS_NOEXEC and MS_NOSUID mount flags, which can prevent
+	  certain kinds of code-execution attack on embedded platforms.
+
 config STANDALONE
 	bool "Select only drivers that don't need compile-time external firmware"
 	default y

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 30d0523..354877e 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c

@@ -362,6 +362,7 @@
 int devtmpfs_mount(const char *mntdir)
 {
 	int err;
+	int mflags = MS_SILENT;
 
 	if (!mount_dev)
 		return 0;
@@ -369,7 +370,10 @@
 	if (!thread)
 		return 0;
 
-	err = ksys_mount("devtmpfs", mntdir, "devtmpfs", MS_SILENT, NULL);
+#ifdef CONFIG_DEVTMPFS_SAFE
+	mflags |= MS_NOEXEC | MS_NOSUID;
+#endif
+	err = ksys_mount("devtmpfs", mntdir, "devtmpfs", mflags, NULL);
 	if (err)
 		printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
 	else

diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index b869316..7ef3cec 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c

@@ -301,3 +301,254 @@
 
 module_param(create, charp, 0);
 MODULE_PARM_DESC(create, "Create a mapped device in early boot");
+
+/* ---------------------------------------------------------------
+ * ChromeOS shim - convert dm= format to dm-mod.create= format
+ * ---------------------------------------------------------------
+ */
+
+struct dm_chrome_target {
+	char *field[4];
+};
+
+struct dm_chrome_dev {
+	char *name, *uuid, *mode;
+	unsigned int num_targets;
+	struct dm_chrome_target targets[DM_MAX_TARGETS];
+};
+
+static char __init *dm_chrome_parse_target(char *str, struct dm_chrome_target *tgt)
+{
+	unsigned int i;
+
+	tgt->field[0] = str;
+	/* Delimit first 3 fields that are separated by space */
+	for (i = 0; i < ARRAY_SIZE(tgt->field) - 1; i++) {
+		tgt->field[i + 1] = str_field_delimit(&tgt->field[i], ' ');
+		if (!tgt->field[i + 1])
+			return NULL;
+	}
+	/* Delimit last field that can be terminated by comma */
+	return str_field_delimit(&tgt->field[i], ',');
+}
+
+static char __init *dm_chrome_parse_dev(char *str, struct dm_chrome_dev *dev)
+{
+	char *target, *num;
+	unsigned int i;
+
+	if (!str)
+		return ERR_PTR(-EINVAL);
+
+	target = str_field_delimit(&str, ',');
+	if (!target)
+		return ERR_PTR(-EINVAL);
+
+	/* Delimit first 3 fields that are separated by space */
+	dev->name = str;
+	dev->uuid = str_field_delimit(&dev->name, ' ');
+	if (!dev->uuid)
+		return ERR_PTR(-EINVAL);
+
+	dev->mode = str_field_delimit(&dev->uuid, ' ');
+	if (!dev->mode)
+		return ERR_PTR(-EINVAL);
+
+	/* num is optional */
+	num = str_field_delimit(&dev->mode, ' ');
+	if (!num)
+		dev->num_targets = 1;
+	else {
+		/* Delimit num and check if it the last field */
+		if(str_field_delimit(&num, ' '))
+			return ERR_PTR(-EINVAL);
+		if (kstrtouint(num, 0, &dev->num_targets))
+			return ERR_PTR(-EINVAL);
+	}
+
+	if (dev->num_targets > DM_MAX_TARGETS) {
+		DMERR("too many targets %u > %d",
+		      dev->num_targets, DM_MAX_TARGETS);
+		return ERR_PTR(-EINVAL);
+	}
+
+	for (i = 0; i < dev->num_targets - 1; i++) {
+		target = dm_chrome_parse_target(target, &dev->targets[i]);
+		if (!target)
+			return ERR_PTR(-EINVAL);
+	}
+	/* The last one can return NULL if it reaches the end of str */
+	return dm_chrome_parse_target(target, &dev->targets[i]);
+}
+
+static char __init *dm_chrome_convert(struct dm_chrome_dev *devs, unsigned int num_devs)
+{
+	char *str = kmalloc(DM_MAX_STR_SIZE, GFP_KERNEL);
+	char *p = str;
+	unsigned int i, j;
+	int ret;
+
+	if (!str)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < num_devs; i++) {
+		if (!strcmp(devs[i].uuid, "none"))
+			devs[i].uuid = "";
+		ret = snprintf(p, DM_MAX_STR_SIZE - (p - str),
+			       "%s,%s,,%s",
+			       devs[i].name,
+			       devs[i].uuid,
+			       devs[i].mode);
+		if (ret < 0)
+			goto out;
+		p += ret;
+
+		for (j = 0; j < devs[i].num_targets; j++) {
+			ret = snprintf(p, DM_MAX_STR_SIZE - (p - str),
+				       ",%s %s %s %s",
+				       devs[i].targets[j].field[0],
+				       devs[i].targets[j].field[1],
+				       devs[i].targets[j].field[2],
+				       devs[i].targets[j].field[3]);
+			if (ret < 0)
+				goto out;
+			p += ret;
+		}
+		if (i < num_devs - 1) {
+			ret = snprintf(p, DM_MAX_STR_SIZE - (p - str), ";");
+			if (ret < 0)
+				goto out;
+			p += ret;
+		}
+	}
+
+	return str;
+
+out:
+	kfree(str);
+	return ERR_PTR(ret);
+}
+
+/**
+ * dm_chrome_shim - convert old dm= format used in chromeos to the new
+ * upstream format.
+ *
+ * ChromeOS old format
+ * -------------------
+ * <device>        ::= [<num>] <device-mapper>+
+ * <device-mapper> ::= <head> "," <target>+
+ * <head>          ::= <name> <uuid> <mode> [<num>]
+ * <target>        ::= <start> <length> <type> <options> ","
+ * <mode>          ::= "ro" | "rw"
+ * <uuid>          ::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | "none"
+ * <type>          ::= "verity" | "bootcache" | ...
+ *
+ * Example:
+ * 2 vboot none ro 1,
+ *     0 1768000 bootcache
+ *       device=aa55b119-2a47-8c45-946a-5ac57765011f+1
+ *       signature=76e9be054b15884a9fa85973e9cb274c93afadb6
+ *       cache_start=1768000 max_blocks=100000 size_limit=23 max_trace=20000,
+ *   vroot none ro 1,
+ *     0 1740800 verity payload=254:0 hashtree=254:0 hashstart=1740800 alg=sha1
+ *       root_hexdigest=76e9be054b15884a9fa85973e9cb274c93afadb6
+ *       salt=5b3549d54d6c7a3837b9b81ed72e49463a64c03680c47835bef94d768e5646fe
+ *
+ * Notes:
+ *  1. uuid is a label for the device and we set it to "none".
+ *  2. The <num> field will be optional initially and assumed to be 1.
+ *     Once all the scripts that set these fields have been set, it will
+ *     be made mandatory.
+ */
+
+static char *chrome_create;
+
+static int __init dm_chrome_shim(char *arg) {
+	if (!arg || create)
+		return -EINVAL;
+	chrome_create = arg;
+	return 0;
+}
+
+static int __init dm_chrome_parse_devices(void)
+{
+	struct dm_chrome_dev *devs;
+	unsigned int num_devs, i;
+	char *next, *base_str;
+	int ret = 0;
+
+	/* Verify if dm-mod.create was not used */
+	if (!chrome_create || create)
+		return -EINVAL;
+
+	if (strlen(chrome_create) >= DM_MAX_STR_SIZE) {
+		DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE);
+		return -EINVAL;
+	}
+
+	base_str = kstrdup(chrome_create, GFP_KERNEL);
+	if (!base_str)
+		return -ENOMEM;
+
+	next = str_field_delimit(&base_str, ' ');
+	if (!next) {
+		ret = -EINVAL;
+		goto out_str;
+	}
+
+	/* if first field is not the optional <num> field */
+	if (kstrtouint(base_str, 0, &num_devs)) {
+		num_devs = 1;
+		/* rewind next pointer */
+		next = base_str;
+	}
+
+	if (num_devs > DM_MAX_DEVICES) {
+		DMERR("too many devices %u > %d", num_devs, DM_MAX_DEVICES);
+		ret = -EINVAL;
+		goto out_str;
+	}
+
+	devs = kcalloc(num_devs, sizeof(*devs), GFP_KERNEL);
+	if (!devs)
+		return -ENOMEM;
+
+	/* restore string */
+	strcpy(base_str, chrome_create);
+
+	/* parse devices */
+	for (i = 0; i < num_devs; i++) {
+		next = dm_chrome_parse_dev(next, &devs[i]);
+		if (IS_ERR(next)) {
+			DMERR("couldn't parse device");
+			ret = PTR_ERR(next);
+			goto out_devs;
+		}
+	}
+
+	create = dm_chrome_convert(devs, num_devs);
+	if (IS_ERR(create)) {
+		ret = PTR_ERR(create);
+		goto out_devs;
+	}
+
+	DMDEBUG("Converting:\n\tdm=\"%s\"\n\tdm-mod.create=\"%s\"\n",
+		chrome_create, create);
+
+	/* Call upstream code */
+	dm_init_init();
+
+	kfree(create);
+
+out_devs:
+	create = NULL;
+	kfree(devs);
+out_str:
+	kfree(base_str);
+
+	return ret;
+}
+
+late_initcall(dm_chrome_parse_devices);
+
+__setup("dm=", dm_chrome_shim);

diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 711f101..4f1c21f 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c

@@ -16,8 +16,10 @@
 #include "dm-verity.h"
 #include "dm-verity-fec.h"
 #include "dm-verity-verify-sig.h"
+#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/reboot.h>
+#include <crypto/hash.h>
 
 #define DM_MSG_PREFIX			"verity"
 
@@ -32,8 +34,9 @@
 #define DM_VERITY_OPT_RESTART		"restart_on_corruption"
 #define DM_VERITY_OPT_IGN_ZEROES	"ignore_zero_blocks"
 #define DM_VERITY_OPT_AT_MOST_ONCE	"check_at_most_once"
+#define DM_VERITY_OPT_ERROR_BEHAVIOR	"error_behavior"
 
-#define DM_VERITY_OPTS_MAX		(3 + DM_VERITY_OPTS_FEC + \
+#define DM_VERITY_OPTS_MAX		(4 + DM_VERITY_OPTS_FEC + \
 					 DM_VERITY_ROOT_HASH_VERIFICATION_OPTS)
 
 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
@@ -47,6 +50,120 @@
 	unsigned n_blocks;
 };
 
+/* Provide a lightweight means of specifying the global default for
+ * error behavior: eio, reboot, or none
+ * Legacy support for 0 = eio, 1 = reboot/panic, 2 = none, 3 = notify.
+ * This is matched to the enum in dm-verity.h.
+ */
+static char *error_behavior_istring[] = { "0", "1", "2", "3" };
+static const char *allowed_error_behaviors[] = { "eio", "panic", "none",
+						 "notify", NULL };
+static char *error_behavior = "eio";
+module_param(error_behavior, charp, 0644);
+MODULE_PARM_DESC(error_behavior, "Behavior on error "
+				 "(eio, panic, none, notify)");
+
+/* Controls whether verity_get_device will wait forever for a device. */
+static int dev_wait;
+module_param(dev_wait, int, 0444);
+MODULE_PARM_DESC(dev_wait, "Wait forever for a backing device");
+
+static BLOCKING_NOTIFIER_HEAD(verity_error_notifier);
+
+int dm_verity_register_error_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&verity_error_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(dm_verity_register_error_notifier);
+
+int dm_verity_unregister_error_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&verity_error_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(dm_verity_unregister_error_notifier);
+
+/* If the request is not successful, this handler takes action.
+ * TODO make this call a registered handler.
+ */
+static void verity_error(struct dm_verity *v, struct dm_verity_io *io,
+			 blk_status_t status)
+{
+	const char *message = v->hash_failed ? "integrity" : "block";
+	int error_behavior = DM_VERITY_ERROR_BEHAVIOR_PANIC;
+	dev_t devt = 0;
+	u64 block = ~0;
+	struct dm_verity_error_state error_state;
+	/* If the hash did not fail, then this is likely transient. */
+	int transient = !v->hash_failed;
+
+	devt = v->data_dev->bdev->bd_dev;
+	error_behavior = v->error_behavior;
+
+	DMERR_LIMIT("verification failure occurred: %s failure", message);
+
+	if (error_behavior == DM_VERITY_ERROR_BEHAVIOR_NOTIFY) {
+		error_state.code = status;
+		error_state.transient = transient;
+		error_state.block = block;
+		error_state.message = message;
+		error_state.dev_start = v->data_start;
+		error_state.dev_len = v->data_blocks;
+		error_state.dev = v->data_dev->bdev;
+		error_state.hash_dev_start = v->hash_start;
+		error_state.hash_dev_len = v->hash_blocks;
+		error_state.hash_dev = v->hash_dev->bdev;
+
+		/* Set default fallthrough behavior. */
+		error_state.behavior = DM_VERITY_ERROR_BEHAVIOR_PANIC;
+		error_behavior = DM_VERITY_ERROR_BEHAVIOR_PANIC;
+
+		if (!blocking_notifier_call_chain(
+		    &verity_error_notifier, transient, &error_state)) {
+			error_behavior = error_state.behavior;
+		}
+	}
+
+	switch (error_behavior) {
+	case DM_VERITY_ERROR_BEHAVIOR_EIO:
+		break;
+	case DM_VERITY_ERROR_BEHAVIOR_NONE:
+		break;
+	default:
+		if (!transient)
+			goto do_panic;
+	}
+	return;
+
+do_panic:
+	panic("dm-verity failure: "
+	      "device:%u:%u status:%d block:%llu message:%s",
+	      MAJOR(devt), MINOR(devt), status, (u64)block, message);
+}
+
+/**
+ * verity_parse_error_behavior - parse a behavior charp to the enum
+ * @behavior:	NUL-terminated char array
+ *
+ * Checks if the behavior is valid either as text or as an index digit
+ * and returns the proper enum value in string form or ERR_PTR(-EINVAL)
+ * on error.
+ */
+static char *verity_parse_error_behavior(const char *behavior)
+{
+	const char **allowed = allowed_error_behaviors;
+	int index;
+
+	for (index = 0; *allowed; allowed++, index++)
+		if (!strcmp(*allowed, behavior) || behavior[0] == index + '0')
+			break;
+
+	if (!*allowed)
+		return ERR_PTR(-EINVAL);
+
+	/* Convert to the integer index matching the enum. */
+	return error_behavior_istring[index];
+}
+
 /*
  * Auxiliary structure appended to each dm-bufio buffer. If the value
  * hash_verified is nonzero, hash of the block has been verified.
@@ -550,6 +667,8 @@
 	struct dm_verity *v = io->v;
 	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
 
+	if (status && !verity_fec_is_enabled(io->v))
+		verity_error(v, io, status);
 	bio->bi_end_io = io->orig_bi_end_io;
 	bio->bi_status = status;
 
@@ -917,6 +1036,22 @@
 				return r;
 			continue;
 
+		} else if (!strcasecmp(arg_name, DM_VERITY_OPT_ERROR_BEHAVIOR)) {
+			int behavior;
+
+			if (!argc) {
+				ti->error = "Missing error behavior parameter";
+				return -EINVAL;
+			}
+			if (kstrtoint(dm_shift_arg(as), 0, &behavior) ||
+			    behavior < 0) {
+				ti->error = "Bad error behavior parameter";
+				return -EINVAL;
+			}
+			v->error_behavior = behavior;
+			argc--;
+			continue;
+
 		} else if (verity_is_fec_opt_arg(arg_name)) {
 			r = verity_fec_parse_opt_args(as, v, &argc, arg_name);
 			if (r)
@@ -939,6 +1074,132 @@
 	return r;
 }
 
+static int verity_get_device(struct dm_target *ti, const char *devname,
+			     struct dm_dev **dm_dev)
+{
+	do {
+		/* Try the normal path first since if everything is ready, it
+		 * will be the fastest.
+		 */
+		if (!dm_get_device(ti, devname,
+				   dm_table_get_mode(ti->table), dm_dev))
+			return 0;
+
+		if (!dev_wait)
+			break;
+
+		/* No need to be too aggressive since this is a slow path. */
+		msleep(500);
+	} while (dev_wait && (driver_probe_done() != 0 || *dm_dev == NULL));
+	return -1;
+}
+
+static void splitarg(char *arg, char **key, char **val)
+{
+	*key = strsep(&arg, "=");
+	*val = strsep(&arg, "");
+}
+
+/* Convert Chrome OS arguments into standard arguments */
+
+static char *chromeos_args(unsigned *pargc, char ***pargv)
+{
+	char *hashstart = NULL;
+	char **argv = *pargv;
+	int argc = *pargc;
+	char *key, *val;
+	int nargc = 10;
+	char **nargv;
+	char *errstr;
+	int i;
+
+	nargv = kcalloc(14, sizeof(char *), GFP_KERNEL);
+	if (!nargv)
+		return "Failed to allocate memory";
+
+	nargv[0] = "0";		/* version */
+	nargv[3] = "4096";	/* hash block size */
+	nargv[4] = "4096";	/* data block size */
+	nargv[9] = "-";		/* salt (optional) */
+
+	for (i = 0; i < argc; ++i) {
+		DMDEBUG("Argument %d: '%s'", i, argv[i]);
+		splitarg(argv[i], &key, &val);
+		if (!key) {
+			DMWARN("Bad argument %d: missing key?", i);
+			errstr = "Bad argument: missing key";
+			goto err;
+		}
+		if (!val) {
+			DMWARN("Bad argument %d='%s': missing value", i, key);
+			errstr = "Bad argument: missing value";
+			goto err;
+		}
+		if (!strcmp(key, "alg")) {
+			nargv[7] = val;
+		} else if (!strcmp(key, "payload")) {
+			nargv[1] = val;
+		} else if (!strcmp(key, "hashtree")) {
+			nargv[2] = val;
+		} else if (!strcmp(key, "root_hexdigest")) {
+			nargv[8] = val;
+		} else if (!strcmp(key, "hashstart")) {
+			unsigned long num;
+
+			if (kstrtoul(val, 10, &num)) {
+				errstr = "Invalid hashstart";
+				goto err;
+			}
+			num >>= (12 - SECTOR_SHIFT);
+			hashstart = kmalloc(24, GFP_KERNEL);
+			if (!hashstart) {
+				errstr = "Failed to allocate memory";
+				goto err;
+			}
+			scnprintf(hashstart, sizeof(hashstart), "%lu", num);
+			nargv[5] = hashstart;
+			nargv[6] = hashstart;
+		} else if (!strcmp(key, "salt")) {
+			nargv[9] = val;
+		} else if (!strcmp(key, DM_VERITY_OPT_ERROR_BEHAVIOR)) {
+			char *behavior = verity_parse_error_behavior(val);
+
+			if (IS_ERR(behavior)) {
+				errstr = "Invalid error behavior";
+				goto err;
+			}
+			nargv[10] = "2";
+			nargv[11] = key;
+			nargv[12] = behavior;
+			nargc = 13;
+		}
+	}
+
+	if (!nargv[1] || !nargv[2] || !nargv[5] || !nargv[7] || !nargv[8]) {
+		errstr = "Missing argument";
+		goto err;
+	}
+
+	*pargc = nargc;
+	*pargv = nargv;
+	return NULL;
+
+err:
+	kfree(nargv);
+	kfree(hashstart);
+	return errstr;
+}
+
+/* Release memory allocated for Chrome OS parameter conversion */
+
+static void free_chromeos_argv(char **argv)
+{
+	if (argv) {
+		kfree(argv[5]);
+		kfree(argv);
+	}
+}
+
 /*
  * Target parameters:
  *	<version>	The current format is version 1.
@@ -965,10 +1226,19 @@
 	sector_t hash_position;
 	char dummy;
 	char *root_hash_digest_to_validate;
+	char **chromeos_argv = NULL;
+
+	if (argc < 10) {
+		ti->error = chromeos_args(&argc, &argv);
+		if (ti->error)
+			return -EINVAL;
+		chromeos_argv = argv;
+	}
 
 	v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
 	if (!v) {
 		ti->error = "Cannot allocate verity structure";
+		free_chromeos_argv(chromeos_argv);
 		return -ENOMEM;
 	}
 	ti->private = v;
@@ -998,13 +1268,13 @@
 	}
 	v->version = num;
 
-	r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
+	r = verity_get_device(ti, argv[1], &v->data_dev);
 	if (r) {
 		ti->error = "Data device lookup failed";
 		goto bad;
 	}
 
-	r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
+	r = verity_get_device(ti, argv[2], &v->hash_dev);
 	if (r) {
 		ti->error = "Hash device lookup failed";
 		goto bad;
@@ -1204,14 +1474,14 @@
 				       __alignof__(struct dm_verity_io));
 
 	verity_verify_sig_opts_cleanup(&verify_args);
-
+	free_chromeos_argv(chromeos_argv);
 	return 0;
 
 bad:
 
 	verity_verify_sig_opts_cleanup(&verify_args);
 	verity_dtr(ti);
-
+	free_chromeos_argv(chromeos_argv);
 	return r;
 }
 

diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 641b9e3..f453caf 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h

@@ -14,6 +14,7 @@
 #include <linux/dm-bufio.h>
 #include <linux/device-mapper.h>
 #include <crypto/hash.h>
+#include <linux/notifier.h>
 
 #define DM_VERITY_MAX_LEVELS		63
 
@@ -55,6 +56,7 @@
 	int hash_failed;	/* set to 1 if hash of any block failed */
 	enum verity_mode mode;	/* mode for handling verification errors */
 	unsigned corrupted_errs;/* Number of errors for corrupted blocks */
+	int error_behavior;	/* selects error behavior on io errors */
 
 	struct workqueue_struct *verify_wq;
 
@@ -92,6 +94,40 @@
 	 */
 };
 
+struct verity_result {
+	struct completion completion;
+	int err;
+};
+
+struct dm_verity_error_state {
+	int code;
+	int transient;  /* Likely to not happen after a reboot */
+	u64 block;
+	const char *message;
+
+	sector_t dev_start;
+	sector_t dev_len;
+	struct block_device *dev;
+
+	sector_t hash_dev_start;
+	sector_t hash_dev_len;
+	struct block_device *hash_dev;
+
+	/* Final behavior after all notifications are completed. */
+	int behavior;
+};
+
+/* This enum must be matched to allowed_error_behaviors in dm-verity.c */
+enum dm_verity_error_behavior {
+	DM_VERITY_ERROR_BEHAVIOR_EIO = 0,
+	DM_VERITY_ERROR_BEHAVIOR_PANIC,
+	DM_VERITY_ERROR_BEHAVIOR_NONE,
+	DM_VERITY_ERROR_BEHAVIOR_NOTIFY
+};
+
+int dm_verity_register_error_notifier(struct notifier_block *nb);
+int dm_verity_unregister_error_notifier(struct notifier_block *nb);
+
 static inline struct ahash_request *verity_io_hash_req(struct dm_verity *v,
 						     struct dm_verity_io *io)
 {

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index ebc37e2..133c698 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h

@@ -27,6 +27,17 @@
 /* 1 for management, 1 for rx, 1 for tx */
 #define GVE_MIN_MSIX 3
 
+/* Numbers of gve tx/rx stats in stats report. */
+#define GVE_TX_STATS_REPORT_NUM	5
+#define GVE_RX_STATS_REPORT_NUM	2
+
+/* Numbers of NIC tx/rx stats in stats report. */
+#define NIC_TX_STATS_REPORT_NUM	0
+#define NIC_RX_STATS_REPORT_NUM	4
+
+/* Interval to schedule a service task, 20000ms. */
+#define GVE_SERVICE_TIMER_PERIOD	20000
+
 /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
 struct gve_rx_desc_queue {
 	struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -39,6 +50,8 @@
 	struct page *page;
 	void *page_address;
 	u32 page_offset; /* offset to write to in page */
+	int pagecnt_bias; /* expected pagecnt if only the driver has a ref */
+	bool can_flip; /* page can be flipped and reused */
 };
 
 /* A list of pages registered with the device during setup and used by a queue
@@ -57,6 +70,7 @@
 	dma_addr_t data_bus; /* dma mapping of the slots */
 	struct gve_rx_slot_page_info *page_info; /* page info of the buffers */
 	struct gve_queue_page_list *qpl; /* qpl assigned to this queue */
+	bool raw_addressing; /* use raw_addressing? */
 };
 
 struct gve_priv;
@@ -71,6 +85,13 @@
 	u32 cnt; /* free-running total number of completed packets */
 	u32 fill_cnt; /* free-running total number of descs and buffs posted */
 	u32 mask; /* masks the cnt and fill_cnt to the size of the ring */
+	u32 db_threshold; /* threshold for posting new buffs and descs */
+	u64 rx_copybreak_pkt; /* free-running count of copybreak packets */
+	u64 rx_copied_pkt; /* free-running total number of copied packets */
+	u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */
+	u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */
+	u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */
+	u64 rx_no_refill_dropped_pkt; /* free-running count of packets dropped because of lack of buffer refill */
 	u32 q_num; /* queue index */
 	u32 ntfy_id; /* notification block index */
 	struct gve_queue_resources *q_resources; /* head and tail pointer idx */
@@ -91,12 +112,20 @@
 	u32 iov_padding; /* padding associated with this segment */
 };
 
+struct gve_tx_dma_buf {
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+};
+
 /* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc
  * ring entry but only used for a pkt_desc not a seg_desc
  */
 struct gve_tx_buffer_state {
 	struct sk_buff *skb; /* skb for this pkt */
-	struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+	union {
+		struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+		struct gve_tx_dma_buf buf;
+	};
 };
 
 /* A TX buffer - each queue has one */
@@ -119,13 +148,16 @@
 	__be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */
 	u64 pkt_done; /* free-running - total packets completed */
 	u64 bytes_done; /* free-running - total bytes completed */
+	u32 dropped_pkt; /* free-running - total packets dropped */
 
 	/* Cacheline 2 -- Read-mostly fields */
 	union gve_tx_desc *desc ____cacheline_aligned;
 	struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */
 	struct netdev_queue *netdev_txq;
 	struct gve_queue_resources *q_resources; /* head and tail pointer idx */
+	struct device *dev;
 	u32 mask; /* masks req and done down to queue size */
+	bool raw_addressing; /* use raw_addressing? */
 
 	/* Slow-path fields */
 	u32 q_num ____cacheline_aligned; /* queue idx */
@@ -141,13 +173,13 @@
  * associated with that irq.
  */
 struct gve_notify_block {
-	__be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */
+	__be32 *irq_db_index; /* pointer to idx into Bar2 */
 	char name[IFNAMSIZ + 16]; /* name registered with the kernel */
 	struct napi_struct napi; /* kernel napi struct for this block */
 	struct gve_priv *priv;
 	struct gve_tx_ring *tx; /* tx rings on this block */
 	struct gve_rx_ring *rx; /* rx rings on this block */
-} ____cacheline_aligned;
+};
 
 /* Tracks allowed and current queue settings */
 struct gve_queue_config {
@@ -161,13 +193,18 @@
 	unsigned long *qpl_id_map; /* bitmap of used qpl ids */
 };
 
+struct gve_irq_db {
+	__be32 index;
+} ____cacheline_aligned;
+
 struct gve_priv {
 	struct net_device *dev;
 	struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
 	struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
 	struct gve_queue_page_list *qpls; /* array of num qpls */
 	struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
-	dma_addr_t ntfy_block_bus;
+	struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */
+	dma_addr_t irq_db_indices_bus;
 	struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */
 	char mgmt_msix_name[IFNAMSIZ + 16];
 	u32 mgmt_msix_idx;
@@ -178,11 +215,12 @@
 	u16 tx_desc_cnt; /* num desc per ring */
 	u16 rx_desc_cnt; /* num desc per ring */
 	u16 tx_pages_per_qpl; /* tx buffer length */
-	u16 rx_pages_per_qpl; /* rx buffer length */
+	u16 rx_data_slot_cnt; /* rx buffer length */
 	u64 max_registered_pages;
 	u64 num_registered_pages; /* num pages registered with NIC */
 	u32 rx_copybreak; /* copy packets smaller than this */
 	u16 default_num_queues; /* default num queues to set up */
+	bool raw_addressing; /* true if this dev supports raw addressing */
 
 	struct gve_queue_config tx_cfg;
 	struct gve_queue_config rx_cfg;
@@ -202,24 +240,63 @@
 	dma_addr_t adminq_bus_addr;
 	u32 adminq_mask; /* masks prod_cnt to adminq size */
 	u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */
+	u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */
+	u32 adminq_timeouts; /* free-running count of AQ cmds timeouts */
+	/* free-running count of per AQ cmd executed */
+	u32 adminq_describe_device_cnt;
+	u32 adminq_cfg_device_resources_cnt;
+	u32 adminq_register_page_list_cnt;
+	u32 adminq_unregister_page_list_cnt;
+	u32 adminq_create_tx_queue_cnt;
+	u32 adminq_create_rx_queue_cnt;
+	u32 adminq_destroy_tx_queue_cnt;
+	u32 adminq_destroy_rx_queue_cnt;
+	u32 adminq_dcfg_device_resources_cnt;
+	u32 adminq_set_driver_parameter_cnt;
+	u32 adminq_report_stats_cnt;
 
+	/* Global stats */
+	u32 interface_up_cnt; /* count of times interface turned up */
+	u32 interface_down_cnt; /* count of times interface turned down */
+	u32 reset_cnt; /* count of reset */
+	u32 page_alloc_fail; /* count of page alloc fails */
+	u32 dma_mapping_error; /* count of dma mapping errors */
 	struct workqueue_struct *gve_wq;
 	struct work_struct service_task;
 	unsigned long service_task_flags;
 	unsigned long state_flags;
+
+	struct gve_stats_report *stats_report;
+	u64 stats_report_len;
+	dma_addr_t stats_report_bus; /* dma address for the stats report */
+	unsigned long ethtool_flags;
+
+	unsigned long service_timer_period;
+	struct timer_list service_timer;
+
+	/* Gvnic device link speed from hypervisor. */
+	u64 link_speed;
+
+	/* Gvnic device's dma mask, set during probe. */
+	u8 dma_mask;
 };
 
-enum gve_service_task_flags {
-	GVE_PRIV_FLAGS_DO_RESET			= BIT(1),
-	GVE_PRIV_FLAGS_RESET_IN_PROGRESS	= BIT(2),
-	GVE_PRIV_FLAGS_PROBE_IN_PROGRESS	= BIT(3),
+enum gve_service_task_flags_bit {
+	GVE_PRIV_FLAGS_DO_RESET			= 1,
+	GVE_PRIV_FLAGS_RESET_IN_PROGRESS	= 2,
+	GVE_PRIV_FLAGS_PROBE_IN_PROGRESS	= 3,
+	GVE_PRIV_FLAGS_DO_REPORT_STATS = 4,
 };
 
-enum gve_state_flags {
-	GVE_PRIV_FLAGS_ADMIN_QUEUE_OK		= BIT(1),
-	GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK	= BIT(2),
-	GVE_PRIV_FLAGS_DEVICE_RINGS_OK		= BIT(3),
-	GVE_PRIV_FLAGS_NAPI_ENABLED		= BIT(4),
+enum gve_state_flags_bit {
+	GVE_PRIV_FLAGS_ADMIN_QUEUE_OK		= 1,
+	GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK	= 2,
+	GVE_PRIV_FLAGS_DEVICE_RINGS_OK		= 3,
+	GVE_PRIV_FLAGS_NAPI_ENABLED		= 4,
+};
+
+enum gve_ethtool_flags_bit {
+	GVE_PRIV_FLAGS_REPORT_STATS		= 0,
 };
 
 static inline bool gve_get_do_reset(struct gve_priv *priv)
@@ -269,6 +346,22 @@
 	clear_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags);
 }
 
+static inline bool gve_get_do_report_stats(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_DO_REPORT_STATS,
+			&priv->service_task_flags);
+}
+
+static inline void gve_set_do_report_stats(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_DO_REPORT_STATS, &priv->service_task_flags);
+}
+
+static inline void gve_clear_do_report_stats(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_DO_REPORT_STATS, &priv->service_task_flags);
+}
+
 static inline bool gve_get_admin_queue_ok(struct gve_priv *priv)
 {
 	return test_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
@@ -329,12 +422,27 @@
 	clear_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
 }
 
+static inline bool gve_get_report_stats(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_REPORT_STATS, &priv->ethtool_flags);
+}
+
+static inline void gve_set_report_stats(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_REPORT_STATS, &priv->ethtool_flags);
+}
+
+static inline void gve_clear_report_stats(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_REPORT_STATS, &priv->ethtool_flags);
+}
+
 /* Returns the address of the ntfy_blocks irq doorbell
  */
 static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
 					       struct gve_notify_block *block)
 {
-	return &priv->db_bar2[be32_to_cpu(block->irq_db_index)];
+	return &priv->db_bar2[be32_to_cpu(*block->irq_db_index)];
 }
 
 /* Returns the index into ntfy_blocks of the given tx ring's block
@@ -355,14 +463,22 @@
  */
 static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
 {
-	return priv->tx_cfg.num_queues;
+	if (priv->raw_addressing) {
+		return 0;
+	} else {
+		return priv->tx_cfg.num_queues;
+	}
 }
 
 /* Returns the number of rx queue page lists
  */
 static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
 {
-	return priv->rx_cfg.num_queues;
+	if (priv->raw_addressing) {
+		return 0;
+	} else {
+		return priv->rx_cfg.num_queues;
+	}
 }
 
 /* Returns a pointer to the next available tx qpl in the list of qpls
@@ -416,18 +532,10 @@
 		return DMA_FROM_DEVICE;
 }
 
-/* Returns true if the max mtu allows page recycling */
-static inline bool gve_can_recycle_pages(struct net_device *dev)
-{
-	/* We can't recycle the pages if we can't fit a packet into half a
-	 * page.
-	 */
-	return dev->max_mtu <= PAGE_SIZE / 2;
-}
-
 /* buffers */
-int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
-		   enum dma_data_direction);
+int gve_alloc_page(struct gve_priv* priv, struct device *dev,
+								struct page **page, dma_addr_t *dma,
+								enum dma_data_direction, gfp_t gfp_flags);
 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
 		   enum dma_data_direction);
 /* tx handling */
@@ -450,6 +558,8 @@
 int gve_adjust_queues(struct gve_priv *priv,
 		      struct gve_queue_config new_rx_config,
 		      struct gve_queue_config new_tx_config);
+/* report stats handling */
+void gve_handle_report_stats(struct gve_priv *priv);
 /* exported by ethtool.c */
 extern const struct ethtool_ops gve_ethtool_ops;
 /* needed by ethtool */

diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index c3ba7ba..b02336b 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c

@@ -23,6 +23,19 @@
 
 	priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1;
 	priv->adminq_prod_cnt = 0;
+	priv->adminq_cmd_fail = 0;
+	priv->adminq_timeouts = 0;
+	priv->adminq_describe_device_cnt = 0;
+	priv->adminq_cfg_device_resources_cnt = 0;
+	priv->adminq_register_page_list_cnt = 0;
+	priv->adminq_unregister_page_list_cnt = 0;
+	priv->adminq_create_tx_queue_cnt = 0;
+	priv->adminq_create_rx_queue_cnt = 0;
+	priv->adminq_destroy_tx_queue_cnt = 0;
+	priv->adminq_destroy_rx_queue_cnt = 0;
+	priv->adminq_dcfg_device_resources_cnt = 0;
+	priv->adminq_set_driver_parameter_cnt = 0;
+	priv->adminq_report_stats_cnt = 0;
 
 	/* Setup Admin queue with the device */
 	iowrite32be(priv->adminq_bus_addr / PAGE_SIZE,
@@ -81,17 +94,18 @@
 	return false;
 }
 
-static int gve_adminq_parse_err(struct device *dev, u32 status)
+static int gve_adminq_parse_err(struct gve_priv *priv, u32 status)
 {
 	if (status != GVE_ADMINQ_COMMAND_PASSED &&
-	    status != GVE_ADMINQ_COMMAND_UNSET)
-		dev_err(dev, "AQ command failed with status %d\n", status);
-
+	    status != GVE_ADMINQ_COMMAND_UNSET) {
+		dev_err(&priv->pdev->dev, "AQ command failed with status %d\n", status);
+		priv->adminq_cmd_fail++;
+	}
 	switch (status) {
 	case GVE_ADMINQ_COMMAND_PASSED:
 		return 0;
 	case GVE_ADMINQ_COMMAND_UNSET:
-		dev_err(dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
+		dev_err(&priv->pdev->dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
 		return -EINVAL;
 	case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
 	case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
@@ -116,36 +130,143 @@
 	case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
 		return -ENOTSUPP;
 	default:
-		dev_err(dev, "parse_aq_err: unknown status code %d\n", status);
+		dev_err(&priv->pdev->dev, "parse_aq_err: unknown status code %d\n", status);
 		return -EINVAL;
 	}
 }
 
+/* Flushes all AQ commands currently queued and waits for them to complete.
+ * If there are failures, it will return the first error.
+ */
+static int gve_adminq_kick_and_wait(struct gve_priv *priv)
+{
+	u32 tail, head;
+	int i;
+
+	tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+	head = priv->adminq_prod_cnt;
+
+	gve_adminq_kick_cmd(priv, head);
+	if (!gve_adminq_wait_for_cmd(priv, head)) {
+		dev_err(&priv->pdev->dev, "AQ commands timed out, need to reset AQ\n");
+		priv->adminq_timeouts++;
+		return -ENOTRECOVERABLE;
+	}
+
+	for (i = tail; i < head; i++) {
+		union gve_adminq_command *cmd;
+		u32 status, err;
+
+		cmd = &priv->adminq[i & priv->adminq_mask];
+		status = be32_to_cpu(READ_ONCE(cmd->status));
+		err = gve_adminq_parse_err(priv, status);
+		if (err)
+			// Return the first error if we failed.
+			return err;
+	}
+
+	return 0;
+}
+
 /* This function is not threadsafe - the caller is responsible for any
  * necessary locks.
  */
-int gve_adminq_execute_cmd(struct gve_priv *priv,
-			   union gve_adminq_command *cmd_orig)
+static int gve_adminq_issue_cmd(struct gve_priv *priv,
+				union gve_adminq_command *cmd_orig)
 {
 	union gve_adminq_command *cmd;
-	u32 status = 0;
-	u32 prod_cnt;
+	u32 tail;
+	u32 opcode;
+
+	tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+
+	// Check if next command will overflow the buffer.
+	if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+		int err;
+
+		// Flush existing commands to make room.
+		err = gve_adminq_kick_and_wait(priv);
+		if (err)
+			return err;
+
+		// Retry.
+		tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+		if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+			// This should never happen. We just flushed the
+			// command queue so there should be enough space.
+			return -ENOMEM;
+		}
+	}
 
 	cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
 	priv->adminq_prod_cnt++;
-	prod_cnt = priv->adminq_prod_cnt;
 
 	memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
+	opcode = be32_to_cpu(READ_ONCE(cmd->opcode));
 
-	gve_adminq_kick_cmd(priv, prod_cnt);
-	if (!gve_adminq_wait_for_cmd(priv, prod_cnt)) {
-		dev_err(&priv->pdev->dev, "AQ command timed out, need to reset AQ\n");
-		return -ENOTRECOVERABLE;
+	switch(opcode) {
+	case GVE_ADMINQ_DESCRIBE_DEVICE:
+		priv->adminq_describe_device_cnt++;
+		break;
+	case GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES:
+		priv->adminq_cfg_device_resources_cnt++;
+		break;
+	case GVE_ADMINQ_REGISTER_PAGE_LIST:
+		priv->adminq_register_page_list_cnt++;
+		break;
+	case GVE_ADMINQ_UNREGISTER_PAGE_LIST:
+		priv->adminq_unregister_page_list_cnt++;
+		break;
+	case GVE_ADMINQ_CREATE_TX_QUEUE:
+		priv->adminq_create_tx_queue_cnt++;
+		break;
+	case GVE_ADMINQ_CREATE_RX_QUEUE:
+		priv->adminq_create_rx_queue_cnt++;
+		break;
+	case GVE_ADMINQ_DESTROY_TX_QUEUE:
+		priv->adminq_destroy_tx_queue_cnt++;
+		break;
+	case GVE_ADMINQ_DESTROY_RX_QUEUE:
+		priv->adminq_destroy_rx_queue_cnt++;
+		break;
+	case GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES:
+		priv->adminq_dcfg_device_resources_cnt++;
+		break;
+	case GVE_ADMINQ_SET_DRIVER_PARAMETER:
+		priv->adminq_set_driver_parameter_cnt++;
+		break;
+	case GVE_ADMINQ_REPORT_STATS:
+		priv->adminq_report_stats_cnt++;
+		break;
+	default:
+		dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode);
 	}
 
-	memcpy(cmd_orig, cmd, sizeof(*cmd));
-	status = be32_to_cpu(READ_ONCE(cmd->status));
-	return gve_adminq_parse_err(&priv->pdev->dev, status);
+	return 0;
+}
+
+/* This function is not threadsafe - the caller is responsible for any
+ * necessary locks.
+ * The caller is also responsible for making sure there are no commands
+ * waiting to be executed.
+ */
+static int gve_adminq_execute_cmd(struct gve_priv *priv,
+			   union gve_adminq_command *cmd_orig)
+{
+	u32 tail, head;
+	int err;
+
+	tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+	head = priv->adminq_prod_cnt;
+	if (tail != head)
+		// This is not a valid path
+		return -EINVAL;
+
+	err = gve_adminq_issue_cmd(priv, cmd_orig);
+	if (err)
+		return err;
+
+	return gve_adminq_kick_and_wait(priv);
 }
 
 /* The device specifies that the management vector can either be the first irq
@@ -172,7 +293,7 @@
 		.num_counters = cpu_to_be32(num_counters),
 		.irq_db_addr = cpu_to_be64(db_array_bus_addr),
 		.num_irq_dbs = cpu_to_be32(num_ntfy_blks),
-		.irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])),
+		.irq_db_stride = cpu_to_be32(sizeof(*priv->irq_db_indices)),
 		.ntfy_blk_msix_base_idx =
 					cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
 	};
@@ -190,80 +311,120 @@
 	return gve_adminq_execute_cmd(priv, &cmd);
 }
 
-int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues)
 {
-	struct gve_tx_ring *tx = &priv->tx[queue_index];
 	union gve_adminq_command cmd;
+	struct gve_tx_ring *tx;
+	u32 qpl_id;
+	int err;
+	int i;
 
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
-	cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
-		.queue_id = cpu_to_be32(queue_index),
-		.reserved = 0,
-		.queue_resources_addr = cpu_to_be64(tx->q_resources_bus),
-		.tx_ring_addr = cpu_to_be64(tx->bus),
-		.queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id),
-		.ntfy_id = cpu_to_be32(tx->ntfy_id),
-	};
+	for (i = 0; i < num_queues; i++) {
+		tx = &priv->tx[i];
+		qpl_id = priv->raw_addressing ? GVE_RAW_ADDRESSING_QPL_ID :
+			 tx->tx_fifo.qpl->id;
+		memset(&cmd, 0, sizeof(cmd));
+		cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
+		cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
+			.queue_id = cpu_to_be32(i),
+			.reserved = 0,
+			.queue_resources_addr =
+				cpu_to_be64(tx->q_resources_bus),
+			.tx_ring_addr = cpu_to_be64(tx->bus),
+			.queue_page_list_id = cpu_to_be32(qpl_id),
+			.ntfy_id = cpu_to_be32(tx->ntfy_id),
+		};
+		err = gve_adminq_issue_cmd(priv, &cmd);
+		if (err)
+			return err;
+	}
 
-	return gve_adminq_execute_cmd(priv, &cmd);
+	return gve_adminq_kick_and_wait(priv);
 }
 
-int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
 {
-	struct gve_rx_ring *rx = &priv->rx[queue_index];
 	union gve_adminq_command cmd;
+	struct gve_rx_ring *rx;
+	u32 qpl_id;
+	int err;
+	int i;
 
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
-	cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
-		.queue_id = cpu_to_be32(queue_index),
-		.index = cpu_to_be32(queue_index),
-		.reserved = 0,
-		.ntfy_id = cpu_to_be32(rx->ntfy_id),
-		.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
-		.rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
-		.rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
-		.queue_page_list_id = cpu_to_be32(rx->data.qpl->id),
-	};
+	for (i = 0; i < num_queues; i++) {
+		rx = &priv->rx[i];
+		qpl_id = priv->raw_addressing ? GVE_RAW_ADDRESSING_QPL_ID :
+			 rx->data.qpl->id;
+		memset(&cmd, 0, sizeof(cmd));
+		cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
+		cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
+			.queue_id = cpu_to_be32(i),
+			.index = cpu_to_be32(i),
+			.reserved = 0,
+			.ntfy_id = cpu_to_be32(rx->ntfy_id),
+			.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
+			.rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
+			.rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
+			.queue_page_list_id = cpu_to_be32(qpl_id),
+		};
+		err = gve_adminq_issue_cmd(priv, &cmd);
+		if (err)
+			return err;
+	}
 
-	return gve_adminq_execute_cmd(priv, &cmd);
+	return gve_adminq_kick_and_wait(priv);
 }
 
-int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 num_queues)
 {
 	union gve_adminq_command cmd;
+	int err;
+	int i;
 
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
-	cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
-		.queue_id = cpu_to_be32(queue_index),
-	};
+	for (i = 0; i < num_queues; i++) {
+		memset(&cmd, 0, sizeof(cmd));
+		cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
+		cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
+			.queue_id = cpu_to_be32(i),
+		};
+		err = gve_adminq_issue_cmd(priv, &cmd);
+		if (err)
+			return err;
+	}
 
-	return gve_adminq_execute_cmd(priv, &cmd);
+	return gve_adminq_kick_and_wait(priv);
 }
 
-int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
+int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
 {
 	union gve_adminq_command cmd;
+	int err;
+	int i;
 
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
-	cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
-		.queue_id = cpu_to_be32(queue_index),
-	};
+	for (i = 0; i < num_queues; i++) {
+		memset(&cmd, 0, sizeof(cmd));
+		cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
+		cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
+			.queue_id = cpu_to_be32(i),
+		};
+		err = gve_adminq_issue_cmd(priv, &cmd);
+		if (err)
+			return err;
+	}
 
-	return gve_adminq_execute_cmd(priv, &cmd);
+	return gve_adminq_kick_and_wait(priv);
 }
 
 int gve_adminq_describe_device(struct gve_priv *priv)
 {
 	struct gve_device_descriptor *descriptor;
+	struct gve_device_option *dev_opt;
 	union gve_adminq_command cmd;
 	dma_addr_t descriptor_bus;
+	u16 num_options;
 	int err = 0;
 	u8 *mac;
 	u16 mtu;
+	int i;
 
 	memset(&cmd, 0, sizeof(cmd));
 	descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE,
@@ -283,8 +444,8 @@
 
 	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
 	if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
-		netif_err(priv, drv, priv->dev, "Tx desc count %d too low\n",
-			  priv->tx_desc_cnt);
+		dev_err(&priv->pdev->dev, "Tx desc count %d too low\n",
+						priv->tx_desc_cnt);
 		err = -EINVAL;
 		goto free_device_descriptor;
 	}
@@ -293,7 +454,7 @@
 	    < PAGE_SIZE ||
 	    priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0])
 	    < PAGE_SIZE) {
-		netif_err(priv, drv, priv->dev, "Rx desc count %d too low\n",
+		dev_err(&priv->pdev->dev, "Rx desc count %d too low\n",
 			  priv->rx_desc_cnt);
 		err = -EINVAL;
 		goto free_device_descriptor;
@@ -302,7 +463,7 @@
 				be64_to_cpu(descriptor->max_registered_pages);
 	mtu = be16_to_cpu(descriptor->mtu);
 	if (mtu < ETH_MIN_MTU) {
-		netif_err(priv, drv, priv->dev, "MTU %d below minimum MTU\n",
+		dev_err(&priv->pdev->dev, "MTU %d below minimum MTU\n",
 			  mtu);
 		err = -EINVAL;
 		goto free_device_descriptor;
@@ -311,18 +472,64 @@
 	priv->num_event_counters = be16_to_cpu(descriptor->counters);
 	ether_addr_copy(priv->dev->dev_addr, descriptor->mac);
 	mac = descriptor->mac;
-	netif_info(priv, drv, priv->dev, "MAC addr: %pM\n", mac);
+	dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
 	priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
-	priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl);
-	if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) {
-		netif_err(priv, drv, priv->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
-			  priv->rx_pages_per_qpl);
-		priv->rx_desc_cnt = priv->rx_pages_per_qpl;
+	priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl);
+	if (priv->rx_data_slot_cnt < priv->rx_desc_cnt) {
+		dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
+			  priv->rx_data_slot_cnt);
+		priv->rx_desc_cnt = priv->rx_data_slot_cnt;
 	}
 	priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
+	dev_opt = (struct gve_device_option *)((void *)descriptor +
+							sizeof(*descriptor));
+
+	num_options = be16_to_cpu(descriptor->num_device_options);
+	for (i = 0; i < num_options; i++) {
+		u16 option_id;
+		u16 option_length;
+
+		if ((void *)dev_opt + sizeof(*dev_opt)  > (void *)descriptor +
+				      be16_to_cpu(descriptor->total_length)) {
+			dev_err(&priv->dev->dev,
+				  "num_options in device_descriptor does not match total length.\n");
+			err = -EINVAL;
+			goto free_device_descriptor;
+		}
+
+		option_id = be16_to_cpu(dev_opt->option_id);
+		option_length = be16_to_cpu(dev_opt->option_length);
+		switch(option_id) {
+		case GVE_DEV_OPT_ID_RAW_ADDRESSING:
+			/* If the length or feature mask doesn't match,
+			 * continue without enabling the feature.
+			 */
+			if (option_length != GVE_DEV_OPT_LEN_RAW_ADDRESSING ||
+			    be32_to_cpu(dev_opt->feat_mask) !=
+			    GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING) {
+				dev_info(&priv->pdev->dev,
+					   "Raw addressing device option not enabled, length or features mask did not match expected.\n");
+				priv->raw_addressing = false;
+			} else {
+				dev_info(&priv->pdev->dev,
+					   "Raw addressing device option enabled.\n");
+				priv->raw_addressing = true;
+			}
+			break;
+		default:
+			/* If we don't recognize the option just continue
+			 * without doing anything.
+			 */
+			dev_info(&priv->pdev->dev,
+				   "Unrecognized device option 0x%hx not enabled.\n",
+				   option_id);
+			break;
+		}
+		dev_opt = (void *)dev_opt + sizeof(*dev_opt) + option_length;
+	}
 
 free_device_descriptor:
-	dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor,
+	dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,
 			  descriptor_bus);
 	return err;
 }
@@ -385,3 +592,45 @@
 
 	return gve_adminq_execute_cmd(priv, &cmd);
 }
+
+int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
+			    dma_addr_t stats_report_addr, u64 interval)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_STATS);
+	cmd.report_stats = (struct gve_adminq_report_stats) {
+		.stats_report_len = cpu_to_be64(stats_report_len),
+		.stats_report_addr = cpu_to_be64(stats_report_addr),
+		.interval = cpu_to_be64(interval),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_report_link_speed(struct gve_priv *priv)
+{
+	union gve_adminq_command gvnic_cmd;
+	dma_addr_t link_speed_region_bus;
+	u64* link_speed_region;
+	int err;
+
+	link_speed_region = dma_alloc_coherent(&priv->pdev->dev,
+		sizeof(*link_speed_region), &link_speed_region_bus, GFP_KERNEL);
+
+	if (!link_speed_region)
+		return -ENOMEM;
+
+	memset(&gvnic_cmd, 0, sizeof(gvnic_cmd));
+	gvnic_cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_LINK_SPEED);
+	gvnic_cmd.report_link_speed.link_speed_address =
+		cpu_to_be64(link_speed_region_bus);
+
+	err = gve_adminq_execute_cmd(priv, &gvnic_cmd);
+
+	priv->link_speed = be64_to_cpu(*link_speed_region);
+	dma_free_coherent(&priv->pdev->dev, sizeof(*link_speed_region),
+					  link_speed_region, link_speed_region_bus);
+	return err;
+}

diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 4dfa06e..c2fc677 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h

@@ -21,6 +21,8 @@
 	GVE_ADMINQ_DESTROY_RX_QUEUE		= 0x8,
 	GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES	= 0x9,
 	GVE_ADMINQ_SET_DRIVER_PARAMETER		= 0xB,
+	GVE_ADMINQ_REPORT_STATS			= 0xC,
+	GVE_ADMINQ_REPORT_LINK_SPEED	= 0xD
 };
 
 /* Admin queue status codes */
@@ -77,12 +79,17 @@
 
 static_assert(sizeof(struct gve_device_descriptor) == 40);
 
-struct device_option {
-	__be32 option_id;
-	__be32 option_length;
+struct gve_device_option {
+	__be16 option_id;
+	__be16 option_length;
+	__be32 feat_mask;
 };
 
-static_assert(sizeof(struct device_option) == 8);
+static_assert(sizeof(struct gve_device_option) == 8);
+
+#define GVE_DEV_OPT_ID_RAW_ADDRESSING 0x1
+#define GVE_DEV_OPT_LEN_RAW_ADDRESSING 0x0
+#define GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING 0x0
 
 struct gve_adminq_configure_device_resources {
 	__be64 counter_array;
@@ -109,6 +116,8 @@
 
 static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4);
 
+#define GVE_RAW_ADDRESSING_QPL_ID 0xFFFFFFFF
+
 struct gve_adminq_create_tx_queue {
 	__be32 queue_id;
 	__be32 reserved;
@@ -172,6 +181,51 @@
 
 static_assert(sizeof(struct gve_adminq_set_driver_parameter) == 16);
 
+struct gve_adminq_report_stats {
+	__be64 stats_report_len;
+	__be64 stats_report_addr;
+	__be64 interval;
+};
+
+static_assert(sizeof(struct gve_adminq_report_stats) == 24);
+
+struct gve_adminq_report_link_speed {
+  __be64 link_speed_address;
+};
+
+static_assert(sizeof(struct gve_adminq_report_link_speed) == 8);
+
+struct stats {
+	__be32 stat_name;
+	__be32 queue_id;
+	__be64 value;
+};
+
+static_assert(sizeof(struct stats) == 16);
+
+struct gve_stats_report {
+	__be64 written_count;
+	struct stats stats[0];
+};
+
+static_assert(sizeof(struct gve_stats_report) == 8);
+
+enum gve_stat_names {
+	// stats from gve
+	TX_WAKE_CNT			= 1,
+	TX_STOP_CNT			= 2,
+	TX_FRAMES_SENT			= 3,
+	TX_BYTES_SENT			= 4,
+	TX_LAST_COMPLETION_PROCESSED	= 5,
+	RX_NEXT_EXPECTED_SEQUENCE	= 6,
+	RX_BUFFERS_POSTED		= 7,
+	// stats from NIC
+	RX_QUEUE_DROP_CNT		= 65,
+	RX_NO_BUFFERS_POSTED		= 66,
+	RX_DROPS_PACKET_OVER_MRU	= 67,
+	RX_DROPS_INVALID_CHECKSUM	= 68,
+};
+
 union gve_adminq_command {
 	struct {
 		__be32 opcode;
@@ -187,6 +241,8 @@
 			struct gve_adminq_register_page_list reg_page_list;
 			struct gve_adminq_unregister_page_list unreg_page_list;
 			struct gve_adminq_set_driver_parameter set_driver_param;
+			struct gve_adminq_report_stats report_stats;
+			struct gve_adminq_report_link_speed report_link_speed;
 		};
 	};
 	u8 reserved[64];
@@ -197,8 +253,6 @@
 int gve_adminq_alloc(struct device *dev, struct gve_priv *priv);
 void gve_adminq_free(struct device *dev, struct gve_priv *priv);
 void gve_adminq_release(struct gve_priv *priv);
-int gve_adminq_execute_cmd(struct gve_priv *priv,
-			   union gve_adminq_command *cmd_orig);
 int gve_adminq_describe_device(struct gve_priv *priv);
 int gve_adminq_configure_device_resources(struct gve_priv *priv,
 					  dma_addr_t counter_array_bus_addr,
@@ -206,12 +260,15 @@
 					  dma_addr_t db_array_bus_addr,
 					  u32 num_ntfy_blks);
 int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
-int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_id);
-int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_id);
-int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_id);
-int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
 int gve_adminq_register_page_list(struct gve_priv *priv,
 				  struct gve_queue_page_list *qpl);
 int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
 int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
+int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
+			    dma_addr_t stats_report_addr, u64 interval);
+int gve_adminq_report_link_speed(struct gve_priv *priv);
 #endif /* _GVE_ADMINQ_H */

diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h
index 54779871..a7da364 100644
--- a/drivers/net/ethernet/google/gve/gve_desc.h
+++ b/drivers/net/ethernet/google/gve/gve_desc.h

@@ -16,9 +16,11 @@
  * Base addresses encoded in seg_addr are not assumed to be physical
  * addresses. The ring format assumes these come from some linear address
  * space. This could be physical memory, kernel virtual memory, user virtual
- * memory. gVNIC uses lists of registered pages. Each queue is assumed
- * to be associated with a single such linear address space to ensure a
- * consistent meaning for seg_addrs posted to its rings.
+ * memory.
+ * If raw dma addressing is not supported then gVNIC uses lists of registered
+ * pages. Each queue is assumed to be associated with a single such linear
+ * address space to ensure a consistent meaning for seg_addrs posted to its
+ * rings.
  */
 
 struct gve_tx_pkt_desc {
@@ -72,12 +74,14 @@
 } __packed;
 static_assert(sizeof(struct gve_rx_desc) == 64);
 
-/* As with the Tx ring format, the qpl_offset entries below are offsets into an
- * ordered list of registered pages.
+/* If the device supports raw dma addressing then the addr in data slot is
+ * the dma address of the buffer.
+ * If the device only supports registered segments than the addr is a byte
+ * offset into the registered segment (an ordered list of pages) where the
+ * buffer is.
  */
 struct gve_rx_data_slot {
-	/* byte offset into the rx registered segment of this slot */
-	__be64 qpl_offset;
+	__be64 addr;
 };
 
 /* GVE Recive Packet Descriptor Seq No */

diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index d8fa816..ae76683 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c

@@ -6,6 +6,7 @@
 
 #include <linux/rtnetlink.h>
 #include "gve.h"
+#include "gve_adminq.h"
 
 static void gve_get_drvinfo(struct net_device *netdev,
 			    struct ethtool_drvinfo *info)
@@ -32,43 +33,86 @@
 }
 
 static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
-	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
-	"rx_dropped", "tx_dropped", "tx_timeouts",
+	"rx_packets", "rx_total_bytes", "rx_total_dropped_pkt",
+	"rx_skb_alloc_fail", "rx_buf_alloc_fail", "rx_desc_err_dropped_pkt",
+	"tx_packets", "tx_total_bytes", "tx_total_dropped_pkt", "tx_timeouts",
+	"interface_up_cnt", "interface_down_cnt", "reset_cnt",
+	"page_alloc_fail", "dma_mapping_error",
+};
+
+static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
+	"rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_bytes[%u]",
+	"rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
+	"rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
+	"rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]",
+};
+
+static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
+	"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_bytes[%u]",
+	"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
+};
+
+static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
+	"adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts",
+	"adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt",
+	"adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt",
+	"adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt",
+	"adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt",
+	"adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt",
+	"adminq_report_stats_cnt",
+};
+
+static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = {
+	"report-stats",
 };
 
 #define GVE_MAIN_STATS_LEN  ARRAY_SIZE(gve_gstrings_main_stats)
-#define NUM_GVE_TX_CNTS	5
-#define NUM_GVE_RX_CNTS	2
+#define GVE_ADMINQ_STATS_LEN  ARRAY_SIZE(gve_gstrings_adminq_stats)
+#define NUM_GVE_TX_CNTS	ARRAY_SIZE(gve_gstrings_tx_stats)
+#define NUM_GVE_RX_CNTS	ARRAY_SIZE(gve_gstrings_rx_stats)
+#define GVE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(gve_gstrings_priv_flags)
 
 static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct gve_priv *priv = netdev_priv(netdev);
 	char *s = (char *)data;
-	int i;
+	int i, j;
 
-	if (stringset != ETH_SS_STATS)
-		return;
+	switch (stringset) {
+	case ETH_SS_STATS:
+		memcpy(s, *gve_gstrings_main_stats,
+		       sizeof(gve_gstrings_main_stats));
+		s += sizeof(gve_gstrings_main_stats);
 
-	memcpy(s, *gve_gstrings_main_stats,
-	       sizeof(gve_gstrings_main_stats));
-	s += sizeof(gve_gstrings_main_stats);
-	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
-		snprintf(s, ETH_GSTRING_LEN, "rx_desc_cnt[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "rx_desc_fill_cnt[%u]", i);
-		s += ETH_GSTRING_LEN;
-	}
-	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
-		snprintf(s, ETH_GSTRING_LEN, "tx_req[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "tx_done[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "tx_wake[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "tx_stop[%u]", i);
-		s += ETH_GSTRING_LEN;
-		snprintf(s, ETH_GSTRING_LEN, "tx_event_counter[%u]", i);
-		s += ETH_GSTRING_LEN;
+		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+			for (j = 0; j < NUM_GVE_RX_CNTS; j++) {
+				snprintf(s, ETH_GSTRING_LEN,
+					 gve_gstrings_rx_stats[j], i);
+				s += ETH_GSTRING_LEN;
+			}
+		}
+
+		for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+			for (j = 0; j < NUM_GVE_TX_CNTS; j++) {
+				snprintf(s, ETH_GSTRING_LEN,
+					 gve_gstrings_tx_stats[j], i);
+				s += ETH_GSTRING_LEN;
+			}
+		}
+
+		memcpy(s, *gve_gstrings_adminq_stats,
+		       sizeof(gve_gstrings_adminq_stats));
+		s += sizeof(gve_gstrings_adminq_stats);
+		break;
+
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(s, *gve_gstrings_priv_flags,
+		       sizeof(gve_gstrings_priv_flags));
+		s += sizeof(gve_gstrings_priv_flags);
+		break;
+
+	default:
+		break;
 	}
 }
 
@@ -78,9 +122,11 @@
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		return GVE_MAIN_STATS_LEN +
+		return GVE_MAIN_STATS_LEN + GVE_ADMINQ_STATS_LEN +
 		       (priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) +
 		       (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS);
+	case ETH_SS_PRIV_FLAGS:
+		return GVE_PRIV_FLAGS_STR_LEN;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -90,24 +136,57 @@
 gve_get_ethtool_stats(struct net_device *netdev,
 		      struct ethtool_stats *stats, u64 *data)
 {
+	u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail,
+		tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt,
+		tmp_tx_pkts, tmp_tx_bytes;
+	u64 rx_pkts, rx_bytes, rx_skb_alloc_fail, rx_buf_alloc_fail,
+			rx_desc_err_dropped_pkt, tx_pkts,
+			tx_bytes;
 	struct gve_priv *priv = netdev_priv(netdev);
-	u64 rx_pkts, rx_bytes, tx_pkts, tx_bytes;
+	int *rx_qid_to_stats_idx;
+	int *tx_qid_to_stats_idx;
+	struct stats *report_stats = priv->stats_report->stats;
+	int stats_idx, base_stats_idx, max_stats_idx;
+	bool skip_nic_stats;
 	unsigned int start;
 	int ring;
-	int i;
+	int i, j;
 
 	ASSERT_RTNL();
 
-	for (rx_pkts = 0, rx_bytes = 0, ring = 0;
+	rx_qid_to_stats_idx = kmalloc_array(priv->rx_cfg.num_queues,
+					    sizeof(int), GFP_KERNEL);
+	if (!rx_qid_to_stats_idx) {
+		return;
+	}
+	tx_qid_to_stats_idx = kmalloc_array(priv->tx_cfg.num_queues,
+					    sizeof(int), GFP_KERNEL);
+	if (!tx_qid_to_stats_idx) {
+		kfree(rx_qid_to_stats_idx);
+		return;
+	}
+
+	for (rx_pkts = 0, rx_bytes = 0, rx_skb_alloc_fail = 0,
+	     rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, ring = 0;
 	     ring < priv->rx_cfg.num_queues; ring++) {
 		if (priv->rx) {
 			do {
+				struct gve_rx_ring *rx = &priv->rx[ring];
 				start =
 				  u64_stats_fetch_begin(&priv->rx[ring].statss);
-				rx_pkts += priv->rx[ring].rpackets;
-				rx_bytes += priv->rx[ring].rbytes;
+				tmp_rx_pkts = rx->rpackets;
+				tmp_rx_bytes = rx->rbytes;
+				tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
+				tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
+				tmp_rx_desc_err_dropped_pkt =
+					rx->rx_desc_err_dropped_pkt;
 			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
 						       start));
+			rx_pkts += tmp_rx_pkts;
+			rx_bytes += tmp_rx_bytes;
+			rx_skb_alloc_fail += tmp_rx_skb_alloc_fail;
+			rx_buf_alloc_fail += tmp_rx_buf_alloc_fail;
+			rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt;
 		}
 	}
 	for (tx_pkts = 0, tx_bytes = 0, ring = 0;
@@ -116,34 +195,111 @@
 			do {
 				start =
 				  u64_stats_fetch_begin(&priv->tx[ring].statss);
-				tx_pkts += priv->tx[ring].pkt_done;
-				tx_bytes += priv->tx[ring].bytes_done;
+				tmp_tx_pkts = priv->tx[ring].pkt_done;
+				tmp_tx_bytes = priv->tx[ring].bytes_done;
 			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
 						       start));
+			tx_pkts += tmp_tx_pkts;
+			tx_bytes += tmp_tx_bytes;
 		}
 	}
 
 	i = 0;
 	data[i++] = rx_pkts;
-	data[i++] = tx_pkts;
 	data[i++] = rx_bytes;
+	/* total rx dropped packets */
+	data[i++] = rx_skb_alloc_fail + rx_buf_alloc_fail +
+		    rx_desc_err_dropped_pkt;
+	data[i++] = rx_skb_alloc_fail;
+	data[i++] = rx_buf_alloc_fail;
+	data[i++] = rx_desc_err_dropped_pkt;
+	data[i++] = tx_pkts;
 	data[i++] = tx_bytes;
-	/* Skip rx_dropped and tx_dropped */
-	i += 2;
+	/* Skip tx_dropped */
+	i++;
 	data[i++] = priv->tx_timeo_cnt;
+	data[i++] = priv->interface_up_cnt;
+	data[i++] = priv->interface_down_cnt;
+	data[i++] = priv->reset_cnt;
+	data[i++] = priv->page_alloc_fail;
+	data[i++] = priv->dma_mapping_error;
 	i = GVE_MAIN_STATS_LEN;
 
+	/* For rx cross-reporting stats, start from nic rx stats in report */
+	base_stats_idx = GVE_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+		GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues;
+	max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues +
+		base_stats_idx;
+	/* Preprocess the stats report for rx, map queue id to start index */
+	skip_nic_stats = false;
+	for (stats_idx = base_stats_idx; stats_idx < max_stats_idx;
+		stats_idx += NIC_RX_STATS_REPORT_NUM) {
+		u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name);
+		u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id);
+		if (stat_name == 0) {
+			/* no stats written by NIC yet */
+			skip_nic_stats = true;
+			break;
+		}
+		rx_qid_to_stats_idx[queue_id] = stats_idx;
+	}
 	/* walk RX rings */
 	if (priv->rx) {
 		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
 			struct gve_rx_ring *rx = &priv->rx[ring];
 
-			data[i++] = rx->cnt;
 			data[i++] = rx->fill_cnt;
+			data[i++] = rx->cnt;
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->rx[ring].statss);
+				tmp_rx_bytes = rx->rbytes;
+				tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
+				tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
+				tmp_rx_desc_err_dropped_pkt =
+					rx->rx_desc_err_dropped_pkt;
+			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+						       start));
+			data[i++] = tmp_rx_bytes;
+			/* rx dropped packets */
+			data[i++] = tmp_rx_skb_alloc_fail +
+				tmp_rx_buf_alloc_fail +
+				tmp_rx_desc_err_dropped_pkt;
+			data[i++] = rx->rx_copybreak_pkt;
+			data[i++] = rx->rx_copied_pkt;
+			/* stats from NIC */
+			if (skip_nic_stats) {
+				/* skip NIC rx stats */
+				i += NIC_RX_STATS_REPORT_NUM;
+				continue;
+			}
+			for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
+				u64 value = be64_to_cpu(report_stats[
+					rx_qid_to_stats_idx[ring] + j].value);
+				data[i++] = value;
+			}
 		}
-	} else {
+	}
+ else {
 		i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS;
 	}
+	/* For tx cross-reporting stats, start from nic tx stats in report */
+	base_stats_idx = max_stats_idx;
+	max_stats_idx = NIC_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+		max_stats_idx;
+	/* Preprocess the stats report for tx, map queue id to start index */
+	skip_nic_stats = false;
+	for (stats_idx = base_stats_idx; stats_idx < max_stats_idx;
+		stats_idx += NIC_TX_STATS_REPORT_NUM) {
+		u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name);
+		u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id);
+		if (stat_name == 0) {
+			/* no stats written by NIC yet */
+			skip_nic_stats = true;
+			break;
+		}
+		tx_qid_to_stats_idx[queue_id] = stats_idx;
+	}
 	/* walk TX rings */
 	if (priv->tx) {
 		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
@@ -151,14 +307,51 @@
 
 			data[i++] = tx->req;
 			data[i++] = tx->done;
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->tx[ring].statss);
+				tmp_tx_bytes = tx->bytes_done;
+			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
+						       start));
+			data[i++] = tmp_tx_bytes;
 			data[i++] = tx->wake_queue;
 			data[i++] = tx->stop_queue;
 			data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv,
 									  tx));
+			/* stats from NIC */
+			if (skip_nic_stats) {
+				/* skip NIC tx stats */
+				i += NIC_TX_STATS_REPORT_NUM;
+				continue;
+			}
+			for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) {
+				u64 value = be64_to_cpu(report_stats[
+					tx_qid_to_stats_idx[ring] + j].value);
+				data[i++] = value;
+			}
 		}
 	} else {
 		i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS;
 	}
+
+	kfree(rx_qid_to_stats_idx);
+	kfree(tx_qid_to_stats_idx);
+
+	/* AQ Stats */
+	data[i++] = priv->adminq_prod_cnt;
+	data[i++] = priv->adminq_cmd_fail;
+	data[i++] = priv->adminq_timeouts;
+	data[i++] = priv->adminq_describe_device_cnt;
+	data[i++] = priv->adminq_cfg_device_resources_cnt;
+	data[i++] = priv->adminq_register_page_list_cnt;
+	data[i++] = priv->adminq_unregister_page_list_cnt;
+	data[i++] = priv->adminq_create_tx_queue_cnt;
+	data[i++] = priv->adminq_create_rx_queue_cnt;
+	data[i++] = priv->adminq_destroy_tx_queue_cnt;
+	data[i++] = priv->adminq_destroy_rx_queue_cnt;
+	data[i++] = priv->adminq_dcfg_device_resources_cnt;
+	data[i++] = priv->adminq_set_driver_parameter_cnt;
+	data[i++] = priv->adminq_report_stats_cnt;
 }
 
 static void gve_get_channels(struct net_device *netdev,
@@ -230,6 +423,99 @@
 	return -EOPNOTSUPP;
 }
 
+static int gve_get_tunable(struct net_device *netdev,
+			   const struct ethtool_tunable *etuna, void *value)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	switch (etuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)value = priv->rx_copybreak;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int gve_set_tunable(struct net_device *netdev,
+			   const struct ethtool_tunable *etuna,
+			   const void *value)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	u32 len;
+
+	switch(etuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		len = *(u32 *)value;
+		if (len > priv->dev->mtu) {
+			return -EINVAL;
+		}
+		priv->rx_copybreak = len;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static u32 gve_get_priv_flags(struct net_device *netdev)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	u32 i, ret_flags = 0;
+
+	for (i = 0; i < GVE_PRIV_FLAGS_STR_LEN; i++) {
+		if (priv->ethtool_flags & BIT(i)) {
+			ret_flags |= BIT(i);
+		}
+	}
+	return ret_flags;
+}
+
+static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	u64 ori_flags, new_flags;
+	u32 i;
+
+	ori_flags = READ_ONCE(priv->ethtool_flags);
+	new_flags = ori_flags;
+
+	for (i = 0; i < GVE_PRIV_FLAGS_STR_LEN; i++) {
+		if (flags & BIT(i))
+			new_flags |= BIT(i);
+		else
+			new_flags &= ~(BIT(i));
+		priv->ethtool_flags = new_flags;
+		/* set report-stats */
+		if (strcmp(gve_gstrings_priv_flags[i], "report-stats") == 0) {
+			/* update the stats when user turns report-stats on */
+			if (flags & BIT(i))
+				gve_handle_report_stats(priv);
+			/* zero off gve stats when report-stats turned off */
+			if (!(flags & BIT(i)) && (ori_flags & BIT(i))) {
+				int tx_stats_num = GVE_TX_STATS_REPORT_NUM *
+					priv->tx_cfg.num_queues;
+				int rx_stats_num = GVE_RX_STATS_REPORT_NUM *
+					priv->rx_cfg.num_queues;
+				memset(priv->stats_report->stats, 0,
+				       (tx_stats_num + rx_stats_num) *
+				       sizeof(struct stats));
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int gve_get_link_ksettings(struct net_device *netdev,
+				       struct ethtool_link_ksettings *cmd)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	int err = gve_adminq_report_link_speed(priv);
+
+	cmd->base.speed = priv->link_speed;
+	return err;
+}
+
 const struct ethtool_ops gve_ethtool_ops = {
 	.get_drvinfo = gve_get_drvinfo,
 	.get_strings = gve_get_strings,
@@ -242,4 +528,9 @@
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = gve_get_ringparam,
 	.reset = gve_user_reset,
+	.get_tunable = gve_get_tunable,
+	.set_tunable = gve_set_tunable,
+	.get_priv_flags = gve_get_priv_flags,
+	.set_priv_flags = gve_set_priv_flags,
+	.get_link_ksettings = gve_get_link_ksettings
 };

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index f8dfa75..f956351 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c

@@ -20,7 +20,7 @@
 #define GVE_DEFAULT_RX_COPYBREAK	(256)
 
 #define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
-#define GVE_VERSION		"1.0.0"
+#define GVE_VERSION		"1.1.0"
 #define GVE_VERSION_PREFIX	"GVE-"
 
 const char gve_version_str[] = GVE_VERSION;
@@ -78,6 +78,60 @@
 	priv->counter_array = NULL;
 }
 
+void gve_service_task_schedule(struct gve_priv *priv)
+{
+	if (!gve_get_probe_in_progress(priv) &&
+	    !gve_get_reset_in_progress(priv)) {
+		gve_set_do_report_stats(priv);
+		queue_work(priv->gve_wq, &priv->service_task);
+	}
+}
+
+static void gve_service_timer(struct timer_list *t)
+{
+	struct gve_priv *priv = from_timer(priv, t, service_timer);
+
+	mod_timer(&priv->service_timer,
+		  round_jiffies(jiffies +
+		  msecs_to_jiffies(priv->service_timer_period)));
+	gve_service_task_schedule(priv);
+}
+
+static int gve_alloc_stats_report(struct gve_priv *priv)
+{
+	int tx_stats_num, rx_stats_num;
+
+	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
+		       priv->tx_cfg.num_queues;
+	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
+		       priv->rx_cfg.num_queues;
+	priv->stats_report_len = sizeof(struct gve_stats_report) +
+				 (tx_stats_num + rx_stats_num) *
+				 sizeof(struct stats);
+	priv->stats_report =
+		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
+				   &priv->stats_report_bus, GFP_KERNEL);
+	if (!priv->stats_report)
+		return -ENOMEM;
+	/* Set up timer for periodic task */
+	timer_setup(&priv->service_timer, gve_service_timer, 0);
+	priv->service_timer_period = GVE_SERVICE_TIMER_PERIOD;
+	/* Start the service task timer */
+	mod_timer(&priv->service_timer,
+		  round_jiffies(jiffies +
+		  msecs_to_jiffies(priv->service_timer_period)));
+	return 0;
+}
+
+static void gve_free_stats_report(struct gve_priv *priv)
+{
+
+	del_timer_sync(&priv->service_timer);
+	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
+			  priv->stats_report, priv->stats_report_bus);
+	priv->stats_report = NULL;
+}
+
 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
 {
 	struct gve_priv *priv = arg;
@@ -187,15 +241,24 @@
 		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
 		goto abort_with_msix_enabled;
 	}
-	priv->ntfy_blocks =
+
+	priv->irq_db_indices =
 		dma_alloc_coherent(&priv->pdev->dev,
 				   priv->num_ntfy_blks *
-				   sizeof(*priv->ntfy_blocks),
-				   &priv->ntfy_block_bus, GFP_KERNEL);
-	if (!priv->ntfy_blocks) {
+				   sizeof(*priv->irq_db_indices),
+				   &priv->irq_db_indices_bus, GFP_KERNEL);
+	if (!priv->irq_db_indices) {
 		err = -ENOMEM;
 		goto abort_with_mgmt_vector;
 	}
+
+	priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
+				     sizeof(*priv->ntfy_blocks), GFP_KERNEL);
+	if (!priv->ntfy_blocks) {
+		err = -ENOMEM;
+		goto abort_with_irq_db_indices;
+	}
+
 	/* Setup the other blocks - the first n-1 vectors */
 	for (i = 0; i < priv->num_ntfy_blks; i++) {
 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
@@ -213,6 +276,7 @@
 		}
 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 				      get_cpu_mask(i % active_cpus));
+		block->irq_db_index = &priv->irq_db_indices[i].index;
 	}
 	return 0;
 abort_with_some_ntfy_blocks:
@@ -224,10 +288,13 @@
 				      NULL);
 		free_irq(priv->msix_vectors[msix_idx].vector, block);
 	}
-	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
-			  sizeof(*priv->ntfy_blocks),
-			  priv->ntfy_blocks, priv->ntfy_block_bus);
+	kvfree(priv->ntfy_blocks);
 	priv->ntfy_blocks = NULL;
+abort_with_irq_db_indices:
+	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
+			  sizeof(*priv->irq_db_indices),
+			  priv->irq_db_indices, priv->irq_db_indices_bus);
+	priv->irq_db_indices = NULL;
 abort_with_mgmt_vector:
 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 abort_with_msix_enabled:
@@ -254,10 +321,12 @@
 		}
 		free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 	}
-	dma_free_coherent(&priv->pdev->dev,
-			  priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
-			  priv->ntfy_blocks, priv->ntfy_block_bus);
+	kvfree(priv->ntfy_blocks);
 	priv->ntfy_blocks = NULL;
+	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
+			  sizeof(*priv->irq_db_indices),
+			  priv->irq_db_indices, priv->irq_db_indices_bus);
+	priv->irq_db_indices = NULL;
 	pci_disable_msix(priv->pdev);
 	kvfree(priv->msix_vectors);
 	priv->msix_vectors = NULL;
@@ -273,19 +342,30 @@
 	err = gve_alloc_notify_blocks(priv);
 	if (err)
 		goto abort_with_counter;
+	err = gve_alloc_stats_report(priv);
+	if (err)
+		goto abort_with_ntfy_blocks;
 	err = gve_adminq_configure_device_resources(priv,
 						    priv->counter_array_bus,
 						    priv->num_event_counters,
-						    priv->ntfy_block_bus,
+						    priv->irq_db_indices_bus,
 						    priv->num_ntfy_blks);
 	if (unlikely(err)) {
 		dev_err(&priv->pdev->dev,
 			"could not setup device_resources: err=%d\n", err);
 		err = -ENXIO;
-		goto abort_with_ntfy_blocks;
+		goto abort_with_stats_report;
 	}
+	err = gve_adminq_report_stats(priv, priv->stats_report_len,
+				      priv->stats_report_bus,
+				      GVE_SERVICE_TIMER_PERIOD);
+	if (err)
+		dev_err(&priv->pdev->dev,
+			"Failed to report stats: err=%d\n", err);
 	gve_set_device_resources_ok(priv);
 	return 0;
+abort_with_stats_report:
+	gve_free_stats_report(priv);
 abort_with_ntfy_blocks:
 	gve_free_notify_blocks(priv);
 abort_with_counter:
@@ -301,6 +381,14 @@
 
 	/* Tell device its resources are being freed */
 	if (gve_get_device_resources_ok(priv)) {
+		/* detach the stats report */
+		err = gve_adminq_report_stats(priv, 0, 0x0,
+			GVE_SERVICE_TIMER_PERIOD);
+		if (err) {
+			dev_err(&priv->pdev->dev,
+				"Failed to detach stats report: err=%d\n", err);
+			gve_trigger_reset(priv);
+		}
 		err = gve_adminq_deconfigure_device_resources(priv);
 		if (err) {
 			dev_err(&priv->pdev->dev,
@@ -311,6 +399,7 @@
 	}
 	gve_free_counter_array(priv);
 	gve_free_notify_blocks(priv);
+	gve_free_stats_report(priv);
 	gve_clear_device_resources_ok(priv);
 }
 
@@ -374,35 +463,37 @@
 	int err;
 	int i;
 
-	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
-		err = gve_adminq_create_tx_queue(priv, i);
-		if (err) {
-			netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
-				  i);
-			/* This failure will trigger a reset - no need to clean
-			 * up
-			 */
-			return err;
-		}
-		netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
-	}
-	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
-		err = gve_adminq_create_rx_queue(priv, i);
-		if (err) {
-			netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
-				  i);
-			/* This failure will trigger a reset - no need to clean
-			 * up
-			 */
-			return err;
-		}
-		/* Rx data ring has been prefilled with packet buffers at
-		 * queue allocation time.
-		 * Write the doorbell to provide descriptor slots and packet
-		 * buffers to the NIC.
+	err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
+	if (err) {
+		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
+			  priv->tx_cfg.num_queues);
+		/* This failure will trigger a reset - no need to clean
+		 * up
 		 */
+		return err;
+	}
+	netif_dbg(priv, drv, priv->dev, "created %d tx queues \n",
+		  priv->tx_cfg.num_queues);
+
+	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
+	if (err) {
+		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
+			  priv->rx_cfg.num_queues);
+		/* This failure will trigger a reset - no need to clean
+		 * up
+		 */
+		return err;
+	}
+	netif_dbg(priv, drv, priv->dev, "created %d rx queues \n",
+		  priv->rx_cfg.num_queues);
+
+	/* Rx data ring has been prefilled with packet buffers at queue
+	 * allocation time.
+	 * Write the doorbell to provide descriptor slots and packet buffers
+	 * to the NIC.
+	*/
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 		gve_rx_write_doorbell(priv, &priv->rx[i]);
-		netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
 	}
 
 	return 0;
@@ -461,34 +552,23 @@
 static int gve_destroy_rings(struct gve_priv *priv)
 {
 	int err;
-	int i;
 
-	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
-		err = gve_adminq_destroy_tx_queue(priv, i);
-		if (err) {
-			netif_err(priv, drv, priv->dev,
-				  "failed to destroy tx queue %d\n",
-				  i);
-			/* This failure will trigger a reset - no need to clean
-			 * up
-			 */
-			return err;
-		}
-		netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
+	err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
+	if (err) {
+		netif_err(priv, drv, priv->dev,
+			  "failed to destroy tx queues\n");
+		/* This failure will trigger a reset - no need to clean up */
+		return err;
 	}
-	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
-		err = gve_adminq_destroy_rx_queue(priv, i);
-		if (err) {
-			netif_err(priv, drv, priv->dev,
-				  "failed to destroy rx queue %d\n",
-				  i);
-			/* This failure will trigger a reset - no need to clean
-			 * up
-			 */
-			return err;
-		}
-		netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
+	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
+	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
+	if (err) {
+		netif_err(priv, drv, priv->dev,
+			  "failed to destroy rx queues\n");
+		/* This failure will trigger a reset - no need to clean up */
+		return err;
 	}
+	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
 	return 0;
 }
 
@@ -517,15 +597,25 @@
 	}
 }
 
-int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
-		   enum dma_data_direction dir)
+int gve_alloc_page(struct gve_priv *priv, struct device *dev,
+		   struct page **page, dma_addr_t *dma,
+		   enum dma_data_direction dir, gfp_t gfp_flags)
 {
-	*page = alloc_page(GFP_KERNEL);
-	if (!*page)
+	if (priv->dma_mask == 24)
+		gfp_flags |= GFP_DMA;
+	else if (priv->dma_mask == 32)
+		gfp_flags |= GFP_DMA32;
+
+  *page = alloc_page(gfp_flags);
+	if (!*page) {
+		priv->page_alloc_fail++;
 		return -ENOMEM;
+	}
 	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
 	if (dma_mapping_error(dev, *dma)) {
+		priv->dma_mapping_error++;
 		put_page(*page);
+		*page = NULL;
 		return -ENOMEM;
 	}
 	return 0;
@@ -559,9 +649,9 @@
 		return -ENOMEM;
 
 	for (i = 0; i < pages; i++) {
-		err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i],
+		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
 				     &qpl->page_buses[i],
-				     gve_qpl_dma_dir(priv, id));
+				     gve_qpl_dma_dir(priv, id), GFP_KERNEL);
 		/* caller handles clean up */
 		if (err)
 			return -ENOMEM;
@@ -608,6 +698,10 @@
 	int i, j;
 	int err;
 
+	/* Raw addressing means no QPLs */
+	if (priv->raw_addressing)
+		return 0;
+
 	priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
 	if (!priv->qpls)
 		return -ENOMEM;
@@ -620,7 +714,7 @@
 	}
 	for (; i < num_qpls; i++) {
 		err = gve_alloc_queue_page_list(priv, i,
-						priv->rx_pages_per_qpl);
+						priv->rx_data_slot_cnt);
 		if (err)
 			goto free_qpls;
 	}
@@ -648,6 +742,10 @@
 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 	int i;
 
+	/* Raw addressing means no QPLs */
+	if (priv->raw_addressing)
+		return;
+
 	kvfree(priv->qpl_cfg.qpl_id_map);
 
 	for (i = 0; i < num_qpls; i++)
@@ -699,7 +797,8 @@
 	gve_set_device_rings_ok(priv);
 
 	gve_turnup(priv);
-	netif_carrier_on(dev);
+	queue_work(priv->gve_wq, &priv->service_task);
+	priv->interface_up_cnt++;
 	return 0;
 
 free_rings:
@@ -741,6 +840,7 @@
 
 	gve_free_rings(priv);
 	gve_free_qpls(priv);
+	priv->interface_down_cnt++;
 	return 0;
 
 err:
@@ -820,6 +920,7 @@
 	netif_tx_disable(priv->dev);
 
 	gve_clear_napi_enabled(priv);
+	gve_clear_report_stats(priv);
 }
 
 static void gve_turnup(struct gve_priv *priv)
@@ -870,6 +971,10 @@
 		dev_info(&priv->pdev->dev, "Device requested reset.\n");
 		gve_set_do_reset(priv);
 	}
+	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
+		dev_info(&priv->pdev->dev, "Device report stats on.\n");
+		gve_set_do_report_stats(priv);
+	}
 }
 
 static void gve_handle_reset(struct gve_priv *priv)
@@ -888,16 +993,100 @@
 	}
 }
 
-/* Handle NIC status register changes and reset requests */
+void gve_handle_report_stats(struct gve_priv *priv)
+{
+	int idx, stats_idx = 0, tx_bytes;
+	unsigned int start = 0;
+	struct stats *stats = priv->stats_report->stats;
+
+	if (!gve_get_report_stats(priv))
+		return;
+
+	be64_add_cpu(&priv->stats_report->written_count, 1);
+	/* tx stats */
+	if (priv->tx) {
+		for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+			do {
+				start = u64_stats_fetch_begin(&priv->tx[idx].statss);
+				tx_bytes = priv->tx[idx].bytes_done;
+			} while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_WAKE_CNT),
+				.value = cpu_to_be64(priv->tx[idx].wake_queue),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_STOP_CNT),
+				.value = cpu_to_be64(priv->tx[idx].stop_queue),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
+				.value = cpu_to_be64(priv->tx[idx].req),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_BYTES_SENT),
+				.value = cpu_to_be64(tx_bytes),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(
+					TX_LAST_COMPLETION_PROCESSED),
+				.value = cpu_to_be64(priv->tx[idx].done),
+				.queue_id = cpu_to_be32(idx),
+			};
+		}
+	}
+	/* rx stats */
+	if (priv->rx) {
+		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(
+					RX_NEXT_EXPECTED_SEQUENCE),
+				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
+				.queue_id = cpu_to_be32(idx),
+			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
+				.value = cpu_to_be64(priv->rx[0].fill_cnt),
+				.queue_id = cpu_to_be32(idx),
+			};
+		}
+	}
+}
+
+void gve_handle_link_status(struct gve_priv *priv, bool link_status)
+{
+	if (!gve_get_napi_enabled(priv))
+		return;
+
+	if (link_status == netif_carrier_ok(priv->dev))
+		return;
+
+	if (link_status) {
+		netif_carrier_on(priv->dev);
+	} else {
+		dev_info(&priv->pdev->dev, "Device link is down.\n");
+		netif_carrier_off(priv->dev);
+	}
+}
+
+/* Handle NIC status register changes, reset requests and report stats */
 static void gve_service_task(struct work_struct *work)
 {
 	struct gve_priv *priv = container_of(work, struct gve_priv,
 					     service_task);
+	u32 status = ioread32be(&priv->reg_bar0->device_status);
 
-	gve_handle_status(priv,
-			  ioread32be(&priv->reg_bar0->device_status));
+	gve_handle_status(priv, status);
 
 	gve_handle_reset(priv);
+	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
+	if (gve_get_do_report_stats(priv)) {
+		gve_handle_report_stats(priv);
+		gve_clear_do_report_stats(priv);
+	}
 }
 
 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
@@ -916,6 +1105,7 @@
 	if (skip_describe_device)
 		goto setup_device;
 
+	priv->raw_addressing = false;
 	/* Get the initial information we need from the device */
 	err = gve_adminq_describe_device(priv);
 	if (err) {
@@ -927,7 +1117,7 @@
 		priv->dev->max_mtu = PAGE_SIZE;
 		err = gve_adminq_set_mtu(priv, priv->dev->mtu);
 		if (err) {
-			netif_err(priv, drv, priv->dev, "Could not set mtu");
+		        dev_err(&priv->pdev->dev, "Could not set mtu");
 			goto err;
 		}
 	}
@@ -967,10 +1157,10 @@
 						priv->rx_cfg.num_queues);
 	}
 
-	netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
-		   priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
-	netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
-		   priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
+	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
+		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
+	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
+		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
 
 setup_device:
 	err = gve_setup_device_resources(priv);
@@ -1050,6 +1240,9 @@
 	/* Set it all back up */
 	err = gve_reset_recovery(priv, was_up);
 	gve_clear_reset_in_progress(priv);
+	priv->reset_cnt++;
+	priv->interface_up_cnt = 0;
+	priv->interface_down_cnt = 0;
 	return err;
 }
 
@@ -1077,6 +1270,7 @@
 	__be32 __iomem *db_bar;
 	struct gve_registers __iomem *reg_bar;
 	struct gve_priv *priv;
+	u8 dma_mask;
 	int err;
 
 	err = pci_enable_device(pdev);
@@ -1089,19 +1283,6 @@
 
 	pci_set_master(pdev);
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err) {
-		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
-		goto abort_with_pci_region;
-	}
-
-	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err) {
-		dev_err(&pdev->dev,
-			"Failed to set consistent dma mask: err=%d\n", err);
-		goto abort_with_pci_region;
-	}
-
 	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
 	if (!reg_bar) {
 		dev_err(&pdev->dev, "Failed to map pci bar!\n");
@@ -1116,10 +1297,28 @@
 		goto abort_with_reg_bar;
 	}
 
+	dma_mask = readb(&reg_bar->dma_mask);
+	// Default to 64 if the register isn't set
+	if (!dma_mask)
+		dma_mask = 64;
 	gve_write_version(&reg_bar->driver_version);
 	/* Get max queues to alloc etherdev */
 	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
 	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
+		goto abort_with_reg_bar;
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
+	if (err) {
+		dev_err(&pdev->dev,
+			"Failed to set consistent dma mask: err=%d\n", err);
+		goto abort_with_reg_bar;
+	}
+
 	/* Alloc and setup the netdev and priv */
 	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
 	if (!dev) {
@@ -1127,7 +1326,9 @@
 		goto abort_with_db_bar;
 	}
 	SET_NETDEV_DEV(dev, &pdev->dev);
+
 	pci_set_drvdata(pdev, dev);
+
 	dev->ethtool_ops = &gve_ethtool_ops;
 	dev->netdev_ops = &gve_netdev_ops;
 	/* advertise features */
@@ -1152,8 +1353,11 @@
 	priv->db_bar2 = db_bar;
 	priv->service_task_flags = 0x0;
 	priv->state_flags = 0x0;
+	priv->ethtool_flags = 0x0;
+	priv->dma_mask = dma_mask;
 
 	gve_set_probe_in_progress(priv);
+
 	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
 	if (!priv->gve_wq) {
 		dev_err(&pdev->dev, "Could not allocate workqueue");
@@ -1175,6 +1379,7 @@
 	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
 	gve_clear_probe_in_progress(priv);
 	queue_work(priv->gve_wq, &priv->service_task);
+
 	return 0;
 
 abort_with_gve_init:

diff --git a/drivers/net/ethernet/google/gve/gve_register.h b/drivers/net/ethernet/google/gve/gve_register.h
index 84ab889..776c291 100644
--- a/drivers/net/ethernet/google/gve/gve_register.h
+++ b/drivers/net/ethernet/google/gve/gve_register.h

@@ -16,12 +16,14 @@
 	__be32	adminq_pfn;
 	__be32	adminq_doorbell;
 	__be32	adminq_event_counter;
-	u8	reserved[3];
+	u8	reserved[2];
+	u8	dma_mask;
 	u8	driver_version;
 };
 
 enum gve_device_status_flags {
 	GVE_DEVICE_STATUS_RESET_MASK		= BIT(1),
 	GVE_DEVICE_STATUS_LINK_STATUS_MASK	= BIT(2),
+	GVE_DEVICE_STATUS_REPORT_STATS_MASK	= BIT(3),
 };
 #endif /* _GVE_REGISTER_H_ */

diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 9f52e72..a9bbe0e 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c

@@ -16,12 +16,41 @@
 	block->rx = NULL;
 }
 
+static void gve_rx_free_buffer(struct device *dev,
+			       struct gve_rx_slot_page_info *page_info,
+			       struct gve_rx_data_slot *data_slot) {
+	dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) -
+				      page_info->page_offset);
+
+	page_ref_sub(page_info->page, page_info->pagecnt_bias - 1);
+	gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
+}
+
+static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) {
+	u32 slots = rx->mask + 1;
+	int i;
+
+	if (rx->data.raw_addressing) {
+		for (i = 0; i < slots; i++)
+			gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
+					   &rx->data.data_ring[i]);
+	} else {
+		for (i = 0; i < slots; i++)
+			page_ref_sub(rx->data.page_info[i].page,
+				     rx->data.page_info[i].pagecnt_bias - 1);
+		gve_unassign_qpl(priv, rx->data.qpl->id);
+		rx->data.qpl = NULL;
+	}
+	kvfree(rx->data.page_info);
+	rx->data.page_info = NULL;
+}
+
 static void gve_rx_free_ring(struct gve_priv *priv, int idx)
 {
 	struct gve_rx_ring *rx = &priv->rx[idx];
 	struct device *dev = &priv->pdev->dev;
 	size_t bytes;
-	u32 slots;
+	u32 slots = rx->mask + 1;
 
 	gve_rx_remove_from_block(priv, idx);
 
@@ -33,11 +62,8 @@
 			  rx->q_resources, rx->q_resources_bus);
 	rx->q_resources = NULL;
 
-	gve_unassign_qpl(priv, rx->data.qpl->id);
-	rx->data.qpl = NULL;
-	kvfree(rx->data.page_info);
+	gve_rx_unfill_pages(priv, rx);
 
-	slots = rx->mask + 1;
 	bytes = sizeof(*rx->data.data_ring) * slots;
 	dma_free_coherent(dev, bytes, rx->data.data_ring,
 			  rx->data.data_bus);
@@ -52,13 +78,17 @@
 	page_info->page = page;
 	page_info->page_offset = 0;
 	page_info->page_address = page_address(page);
-	slot->qpl_offset = cpu_to_be64(addr);
+	slot->addr = cpu_to_be64(addr);
+	/* The page already has 1 ref */
+	page_ref_add(page, INT_MAX - 1);
+	page_info->pagecnt_bias = INT_MAX;
 }
 
 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
 {
 	struct gve_priv *priv = rx->gve;
 	u32 slots;
+	int err;
 	int i;
 
 	/* Allocate one page per Rx queue slot. Each page is split into two
@@ -71,12 +101,32 @@
 	if (!rx->data.page_info)
 		return -ENOMEM;
 
-	rx->data.qpl = gve_assign_rx_qpl(priv);
-
+	if (!rx->data.raw_addressing)
+		rx->data.qpl = gve_assign_rx_qpl(priv);
 	for (i = 0; i < slots; i++) {
-		struct page *page = rx->data.qpl->pages[i];
-		dma_addr_t addr = i * PAGE_SIZE;
+		struct page *page;
+		dma_addr_t addr;
 
+		if (rx->data.raw_addressing) {
+			err = gve_alloc_page(priv, &priv->pdev->dev, &page,
+					     &addr, DMA_FROM_DEVICE,
+							 GFP_KERNEL);
+			if (err) {
+				int j;
+
+				u64_stats_update_begin(&rx->statss);
+				rx->rx_buf_alloc_fail++;
+				u64_stats_update_end(&rx->statss);
+				for (j = 0; j < i; j++)
+					gve_rx_free_buffer(&priv->pdev->dev,
+							 &rx->data.page_info[j],
+							 &rx->data.data_ring[j]);
+				return err;
+			}
+		} else {
+			page = rx->data.qpl->pages[i];
+			addr = i * PAGE_SIZE;
+		}
 		gve_setup_rx_buffer(&rx->data.page_info[i],
 				    &rx->data.data_ring[i], addr, page);
 	}
@@ -110,8 +160,9 @@
 	rx->gve = priv;
 	rx->q_num = idx;
 
-	slots = priv->rx_pages_per_qpl;
+	slots = priv->rx_data_slot_cnt;
 	rx->mask = slots - 1;
+	rx->data.raw_addressing = priv->raw_addressing;
 
 	/* alloc rx data ring */
 	bytes = sizeof(*rx->data.data_ring) * slots;
@@ -156,8 +207,8 @@
 		err = -ENOMEM;
 		goto abort_with_q_resources;
 	}
-	rx->mask = slots - 1;
 	rx->cnt = 0;
+	rx->db_threshold = priv->rx_desc_cnt / 2;
 	rx->desc.seqno = 1;
 	gve_rx_add_to_block(priv, idx);
 
@@ -168,7 +219,7 @@
 			  rx->q_resources, rx->q_resources_bus);
 	rx->q_resources = NULL;
 abort_filled:
-	kvfree(rx->data.page_info);
+	gve_rx_unfill_pages(priv, rx);
 abort_with_slots:
 	bytes = sizeof(*rx->data.data_ring) * slots;
 	dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
@@ -242,11 +293,11 @@
 	skb_copy_to_linear_data(skb, va, len);
 
 	skb->protocol = eth_type_trans(skb, dev);
+
 	return skb;
 }
 
-static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
-					struct napi_struct *napi,
+static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
 					struct gve_rx_slot_page_info *page_info,
 					u16 len)
 {
@@ -262,14 +313,135 @@
 	return skb;
 }
 
-static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
-			     struct gve_rx_data_slot *data_ring)
+static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
+			       struct gve_rx_slot_page_info *page_info,
+			       struct gve_rx_data_slot *data_slot,
+			       struct gve_rx_ring *rx)
 {
-	u64 addr = be64_to_cpu(data_ring->qpl_offset);
+	struct page *page;
+	dma_addr_t dma;
+	int err;
 
+	err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
+									GFP_ATOMIC);
+	if (err) {
+		u64_stats_update_begin(&rx->statss);
+		rx->rx_buf_alloc_fail++;
+		u64_stats_update_end(&rx->statss);
+		return err;
+	}
+
+	gve_setup_rx_buffer(page_info, data_slot, dma, page);
+	return 0;
+}
+
+static void gve_rx_flip_buffer(struct gve_rx_slot_page_info *page_info,
+			       struct gve_rx_data_slot *data_slot)
+{
+	u64 addr = be64_to_cpu(data_slot->addr);
+
+	/* "flip" to other packet buffer on this page */
 	page_info->page_offset ^= PAGE_SIZE / 2;
 	addr ^= PAGE_SIZE / 2;
-	data_ring->qpl_offset = cpu_to_be64(addr);
+	data_slot->addr = cpu_to_be64(addr);
+}
+
+static bool gve_rx_can_flip_buffers(struct net_device *netdev) {
+#if PAGE_SIZE == 4096
+	/* We can't flip a buffer if we can't fit a packet
+	 * into half a page.
+	 */
+	if (netdev->max_mtu + GVE_RX_PAD + ETH_HLEN  > PAGE_SIZE / 2)
+		return false;
+	return true;
+#else
+	/* PAGE_SIZE != 4096 - don't try to reuse */
+	return false;
+#endif
+}
+
+static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info)
+{
+	int pagecount = page_count(page_info->page);
+
+	/* This page is not being used by any SKBs - reuse */
+	if (pagecount == page_info->pagecnt_bias) {
+		return 1;
+	/* This page is still being used by an SKB - we can't reuse */
+	} else if (pagecount > page_info->pagecnt_bias) {
+		return 0;
+	} else {
+		WARN(pagecount < page_info->pagecnt_bias,
+		     "Pagecount should never be less than the bias.");
+		return -1;
+	}
+}
+
+static void gve_rx_update_pagecnt_bias(struct gve_rx_slot_page_info *page_info)
+{
+	page_info->pagecnt_bias--;
+	if (page_info->pagecnt_bias == 0) {
+		int pagecount = page_count(page_info->page);
+
+		/* If we have run out of bias - set it back up to INT_MAX
+		 * minus the existing refs.
+		 */
+		page_info->pagecnt_bias = INT_MAX - (pagecount);
+		/* Set pagecount back up to max */
+		page_ref_add(page_info->page, INT_MAX - pagecount);
+	}
+}
+
+static struct sk_buff *
+gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
+		      struct gve_rx_slot_page_info *page_info, u16 len,
+		      struct napi_struct *napi,
+		      struct gve_rx_data_slot *data_slot, bool can_flip)
+{
+	struct sk_buff *skb = gve_rx_add_frags(napi, page_info, len);
+
+	if (!skb)
+		return NULL;
+
+	/* Optimistically stop the kernel from freeing the page.
+	 * We will check again in refill to determine if we need to alloc a
+	 * new page.
+	 */
+	gve_rx_update_pagecnt_bias(page_info);
+	page_info->can_flip = can_flip;
+
+	return skb;
+}
+
+static struct sk_buff *
+gve_rx_qpl(struct device *dev, struct net_device *netdev,
+	   struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
+	   u16 len, struct napi_struct *napi,
+	   struct gve_rx_data_slot *data_slot, bool recycle)
+{
+	struct sk_buff *skb;
+	/* if raw_addressing mode is not enabled gvnic can only receive into
+	 * registered segments. If the buffer can't be recycled, our only
+	 * choice is to copy the data out of it so that we can return it to the
+	 * device.
+	 */
+	if (recycle) {
+		skb = gve_rx_add_frags(napi, page_info, len);
+		/* No point in recycling if we didn't get the skb */
+		if (skb) {
+			/* Make sure the networking stack can't free the page */
+			gve_rx_update_pagecnt_bias(page_info);
+			gve_rx_flip_buffer(page_info, data_slot);
+		}
+	} else {
+		skb = gve_rx_copy(netdev, napi, page_info, len);
+		if (skb) {
+			u64_stats_update_begin(&rx->statss);
+			rx->rx_copied_pkt++;
+			u64_stats_update_end(&rx->statss);
+		}
+	}
+	return skb;
 }
 
 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
@@ -278,67 +450,68 @@
 	struct gve_rx_slot_page_info *page_info;
 	struct gve_priv *priv = rx->gve;
 	struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
-	struct net_device *dev = priv->dev;
-	struct sk_buff *skb;
-	int pagecount;
+	struct net_device *netdev = priv->dev;
+	struct gve_rx_data_slot *data_slot;
+	struct sk_buff *skb = NULL;
+	dma_addr_t page_bus;
 	u16 len;
 
 	/* drop this packet */
-	if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR))
-		return true;
+	if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
+		u64_stats_update_begin(&rx->statss);
+		rx->rx_desc_err_dropped_pkt++;
+		u64_stats_update_end(&rx->statss);
+		return false;
+	}
 
 	len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
 	page_info = &rx->data.page_info[idx];
-	dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx],
+
+	data_slot = &rx->data.data_ring[idx];
+	page_bus = (rx->data.raw_addressing) ?
+					be64_to_cpu(data_slot->addr) - page_info->page_offset:
+					rx->data.qpl->page_buses[idx];									
+	dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
 				PAGE_SIZE, DMA_FROM_DEVICE);
 
-	/* gvnic can only receive into registered segments. If the buffer
-	 * can't be recycled, our only choice is to copy the data out of
-	 * it so that we can return it to the device.
-	 */
-
-	if (PAGE_SIZE == 4096) {
-		if (len <= priv->rx_copybreak) {
-			/* Just copy small packets */
-			skb = gve_rx_copy(dev, napi, page_info, len);
-			goto have_skb;
-		}
-		if (unlikely(!gve_can_recycle_pages(dev))) {
-			skb = gve_rx_copy(dev, napi, page_info, len);
-			goto have_skb;
-		}
-		pagecount = page_count(page_info->page);
-		if (pagecount == 1) {
-			/* No part of this page is used by any SKBs; we attach
-			 * the page fragment to a new SKB and pass it up the
-			 * stack.
-			 */
-			skb = gve_rx_add_frags(dev, napi, page_info, len);
-			if (!skb)
-				return true;
-			/* Make sure the kernel stack can't release the page */
-			get_page(page_info->page);
-			/* "flip" to other packet buffer on this page */
-			gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
-		} else if (pagecount >= 2) {
-			/* We have previously passed the other half of this
-			 * page up the stack, but it has not yet been freed.
-			 */
-			skb = gve_rx_copy(dev, napi, page_info, len);
-		} else {
-			WARN(pagecount < 1, "Pagecount should never be < 1");
-			return false;
-		}
+	if (len <= priv->rx_copybreak) {
+		/* Just copy small packets */
+		skb = gve_rx_copy(netdev, napi, page_info, len);
+		if (skb) {
+				u64_stats_update_begin(&rx->statss);
+				rx->rx_copied_pkt++;
+				rx->rx_copybreak_pkt++;
+				u64_stats_update_end(&rx->statss);
+			}
 	} else {
-		skb = gve_rx_copy(dev, napi, page_info, len);
+		bool can_flip = gve_rx_can_flip_buffers(netdev);
+		int recycle = 0;
+
+		if (can_flip) {
+			recycle = gve_rx_can_recycle_buffer(page_info);
+			if (recycle < 0) {
+				gve_schedule_reset(priv);
+				return false;
+			}
+		}
+		if (rx->data.raw_addressing) {
+			skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev,
+						    page_info, len, napi,
+						    data_slot,
+						    can_flip && recycle);
+		} else {
+			skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx,
+					 page_info, len, napi, data_slot,
+					 can_flip && recycle);
+		}
 	}
 
-have_skb:
-	/* We didn't manage to allocate an skb but we haven't had any
-	 * reset worthy failures.
-	 */
-	if (!skb)
-		return true;
+	if (!skb) {
+		u64_stats_update_begin(&rx->statss);
+		rx->rx_skb_alloc_fail++;
+		u64_stats_update_end(&rx->statss);
+ 		return false;
+	}
 
 	if (likely(feat & NETIF_F_RXCSUM)) {
 		/* NIC passes up the partial sum */
@@ -359,6 +532,7 @@
 		napi_gro_frags(napi);
 	else
 		napi_gro_receive(napi, skb);
+
 	return true;
 }
 
@@ -371,26 +545,80 @@
 	next_idx = rx->cnt & rx->mask;
 	desc = rx->desc.desc_ring + next_idx;
 
+	/* make sure we have synchronized the seq no with the device */
+	smp_mb();
 	flags_seq = desc->flags_seq;
-	/* Make sure we have synchronized the seq no with the device */
-	smp_rmb();
+
 
 	return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
 }
 
+static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+	u32 fill_cnt = rx->fill_cnt;
+
+	while ((fill_cnt & rx->mask) != (rx->cnt & rx->mask)) {
+		u32 idx = fill_cnt & rx->mask;
+		struct gve_rx_slot_page_info *page_info =
+						&rx->data.page_info[idx];
+
+		if (page_info->can_flip) {
+			/* The other half of the page is free because it was
+			 * free when we processed the descriptor. Flip to it.
+			 */
+			struct gve_rx_data_slot *data_slot =
+						&rx->data.data_ring[idx];
+
+			gve_rx_flip_buffer(page_info, data_slot);
+			page_info->can_flip = false;
+		} else {
+			/* It is possible that the networking stack has already
+			 * finished processing all outstanding packets in the buffer
+			 * and it can be reused.
+			 * Flipping is unceccessary here - if the networking stack still
+			 * owns half the page it is impossible to tell which half. Either
+			 * the whole page is free or it needs to be replaced.
+			 */
+			int recycle = gve_rx_can_recycle_buffer(page_info);
+
+			if (recycle < 0) {
+				gve_schedule_reset(priv);
+				return false;
+			}
+			if (!recycle) {
+				/* We can't reuse the buffer - alloc a new one*/
+				struct gve_rx_data_slot *data_slot =
+						&rx->data.data_ring[idx];
+				struct device *dev = &priv->pdev->dev;
+
+				gve_rx_free_buffer(dev, page_info, data_slot);
+				page_info->page = NULL;
+				if (gve_rx_alloc_buffer(priv, dev, page_info,
+							data_slot, rx)) {
+					break;
+				}
+			}
+		}
+		fill_cnt++;
+	}
+	rx->fill_cnt = fill_cnt;
+	return true;
+}
+
 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
 		       netdev_features_t feat)
 {
 	struct gve_priv *priv = rx->gve;
+	u32 work_done = 0, packets = 0;
 	struct gve_rx_desc *desc;
 	u32 cnt = rx->cnt;
 	u32 idx = cnt & rx->mask;
-	u32 work_done = 0;
 	u64 bytes = 0;
 
 	desc = rx->desc.desc_ring + idx;
 	while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
 	       work_done < budget) {
+		bool dropped;
 		netif_info(priv, rx_status, priv->dev,
 			   "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
 			   rx->q_num, idx, desc, desc->flags_seq);
@@ -398,9 +626,11 @@
 			   "[%d] seqno=%d rx->desc.seqno=%d\n",
 			   rx->q_num, GVE_SEQNO(desc->flags_seq),
 			   rx->desc.seqno);
-		bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
-		if (!gve_rx(rx, desc, feat, idx))
-			gve_schedule_reset(priv);
+		dropped = !gve_rx(rx, desc, feat, idx);
+		if (!dropped) {
+			bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
+			packets++;
+		}
 		cnt++;
 		idx = cnt & rx->mask;
 		desc = rx->desc.desc_ring + idx;
@@ -412,13 +642,27 @@
 		return false;
 
 	u64_stats_update_begin(&rx->statss);
-	rx->rpackets += work_done;
+	rx->rpackets += packets;
 	rx->rbytes += bytes;
 	u64_stats_update_end(&rx->statss);
 	rx->cnt = cnt;
-	rx->fill_cnt += work_done;
+	/* restock ring slots */
+	if (!rx->data.raw_addressing) {
+		/* In QPL mode buffs are refilled as the desc are processed */
+		rx->fill_cnt += work_done;
+		dma_wmb();/* Ensure descs are visible before ringing doorbell */
+		gve_rx_write_doorbell(priv, rx);
+	} else if (rx->fill_cnt - cnt <= rx->db_threshold) {
+		/* In raw addressing mode buffs are only refilled if the avail
+		 * falls below a threshold.
+		 */
+		if(!gve_rx_refill_buffers(priv, rx))
+			return false;
+		/* restock desc ring slots */
+		dma_wmb();/* Ensure descs are visible before ringing doorbell */
+		gve_rx_write_doorbell(priv, rx);
+	}
 
-	gve_rx_write_doorbell(priv, rx);
 	return gve_rx_work_pending(rx);
 }
 

diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index b653197..c5b8db9 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c

@@ -158,9 +158,11 @@
 			  tx->q_resources, tx->q_resources_bus);
 	tx->q_resources = NULL;
 
-	gve_tx_fifo_release(priv, &tx->tx_fifo);
-	gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
-	tx->tx_fifo.qpl = NULL;
+	if (!tx->raw_addressing) {
+		gve_tx_fifo_release(priv, &tx->tx_fifo);
+		gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+		tx->tx_fifo.qpl = NULL;
+	}
 
 	bytes = sizeof(*tx->desc) * slots;
 	dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
@@ -174,12 +176,16 @@
 
 static void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
 {
+	unsigned int active_cpus = min_t(int, priv->num_ntfy_blks / 2,
+					 num_online_cpus());
 	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx);
 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 	struct gve_tx_ring *tx = &priv->tx[queue_idx];
 
 	block->tx = tx;
 	tx->ntfy_id = ntfy_idx;
+	netif_set_xps_queue(priv->dev, get_cpu_mask(ntfy_idx % active_cpus),
+			    queue_idx);
 }
 
 static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
@@ -206,13 +212,18 @@
 	if (!tx->desc)
 		goto abort_with_info;
 
-	tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
-	if (!tx->tx_fifo.qpl)
-		goto abort_with_desc;
+	tx->raw_addressing = priv->raw_addressing;
+	tx->dev = &priv->pdev->dev;
+	if (!tx->raw_addressing) {
+		tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
 
-	/* map Tx FIFO */
-	if (gve_tx_fifo_init(priv, &tx->tx_fifo))
-		goto abort_with_qpl;
+		if (!tx->tx_fifo.qpl)
+			goto abort_with_desc;
+
+		/* map Tx FIFO */
+		if (gve_tx_fifo_init(priv, &tx->tx_fifo))
+			goto abort_with_qpl;
+	}
 
 	tx->q_resources =
 		dma_alloc_coherent(hdev,
@@ -230,9 +241,11 @@
 	return 0;
 
 abort_with_fifo:
-	gve_tx_fifo_release(priv, &tx->tx_fifo);
+	if (!tx->raw_addressing)
+		gve_tx_fifo_release(priv, &tx->tx_fifo);
 abort_with_qpl:
-	gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+	if (!tx->raw_addressing)
+		gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
 abort_with_desc:
 	dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
 	tx->desc = NULL;
@@ -310,22 +323,44 @@
  * payload wraps to the beginning of the FIFO.
  */
 #define MAX_TX_DESC_NEEDED	3
+static void gve_tx_unmap_buf(struct device *dev,
+			     struct gve_tx_dma_buf *buf)
+{
+	const int buf_len = (int)dma_unmap_len(buf, len);
+	if (buf_len > 0) {
+		dma_unmap_single(dev, dma_unmap_addr(buf, dma),
+				 dma_unmap_len(buf, len),
+				 DMA_TO_DEVICE);
+		dma_unmap_len_set(buf, len, 0);
+	} else if (buf_len < 0) {
+		dma_unmap_page(dev, dma_unmap_addr(buf, dma),
+			       -dma_unmap_len(buf, len),
+			       DMA_TO_DEVICE);
+		dma_unmap_len_set(buf, len, 0);
+	}
+}
 
 /* Check if sufficient resources (descriptor ring space, FIFO space) are
  * available to transmit the given number of bytes.
  */
 static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
 {
-	return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED &&
-		gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required));
+	bool can_alloc = true;
+
+	if (!tx->raw_addressing)
+		can_alloc = gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required);
+
+	return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc);
 }
 
 /* Stops the queue if the skb cannot be transmitted. */
 static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
 {
-	int bytes_required;
+	int bytes_required = 0;
 
-	bytes_required = gve_skb_fifo_bytes_required(tx, skb);
+	if (!tx->raw_addressing)
+		bytes_required = gve_skb_fifo_bytes_required(tx, skb);
+
 	if (likely(gve_can_tx(tx, bytes_required)))
 		return 0;
 
@@ -394,22 +429,23 @@
 	seg_desc->seg.seg_addr = cpu_to_be64(addr);
 }
 
-static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses,
-				    u64 iov_offset, u64 iov_len)
+static void gve_dma_sync_for_device(struct gve_priv *priv,
+								dma_addr_t *page_buses,
+								u64 iov_offset, u64 iov_len)
 {
 	u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE;
 	u64 first_page = iov_offset / PAGE_SIZE;
-	dma_addr_t dma;
 	u64 page;
 
 	for (page = first_page; page <= last_page; page++) {
-		dma = page_buses[page];
-		dma_sync_single_for_device(dev, dma, PAGE_SIZE, DMA_TO_DEVICE);
+		dma_addr_t dma = page_buses[page];
+		dma_sync_single_for_device(&priv->pdev->dev, dma, PAGE_SIZE,
+					   DMA_TO_DEVICE);
 	}
 }
 
-static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb,
-			  struct device *dev)
+static int gve_tx_add_skb_copy(struct gve_priv* priv, struct gve_tx_ring *tx,
+								struct sk_buff *skb)
 {
 	int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
 	union gve_tx_desc *pkt_desc, *seg_desc;
@@ -451,7 +487,7 @@
 	skb_copy_bits(skb, 0,
 		      tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
 		      hlen);
-	gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+	gve_dma_sync_for_device(priv, tx->tx_fifo.qpl->page_buses,
 				info->iov[hdr_nfrags - 1].iov_offset,
 				info->iov[hdr_nfrags - 1].iov_len);
 	copy_offset = hlen;
@@ -467,7 +503,7 @@
 		skb_copy_bits(skb, copy_offset,
 			      tx->tx_fifo.base + info->iov[i].iov_offset,
 			      info->iov[i].iov_len);
-		gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+		gve_dma_sync_for_device(priv, tx->tx_fifo.qpl->page_buses,
 					info->iov[i].iov_offset,
 					info->iov[i].iov_len);
 		copy_offset += info->iov[i].iov_len;
@@ -476,6 +512,96 @@
 	return 1 + payload_nfrags;
 }
 
+static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
+				  struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	int hlen, payload_nfrags, l4_hdr_offset, seg_idx_bias;
+	union gve_tx_desc *pkt_desc, *seg_desc;
+	struct gve_tx_buffer_state *info;
+	bool is_gso = skb_is_gso(skb);
+	u32 idx = tx->req & tx->mask;
+	struct gve_tx_dma_buf *buf;
+	int last_mapped = 0;
+	u64 addr;
+	u32 len;
+	int i;
+
+	info = &tx->info[idx];
+	pkt_desc = &tx->desc[idx];
+
+	l4_hdr_offset = skb_checksum_start_offset(skb);
+	/* If the skb is gso, then we want the tcp header in the first segment
+	 * otherwise we want the linear portion of the skb (which will contain
+	 * the checksum because skb->csum_start and skb->csum_offset are given
+	 * relative to skb->head) in the first segment.
+	 */
+	hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
+			skb_headlen(skb);
+	len = skb_headlen(skb);
+
+	info->skb =  skb;
+
+	addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(tx->dev, addr))) {
+		priv->dma_mapping_error++;
+		goto drop;
+	}
+	buf = &info->buf;
+	dma_unmap_len_set(buf, len, len);
+	dma_unmap_addr_set(buf, dma, addr);
+
+	payload_nfrags = shinfo->nr_frags;
+	if (hlen < len) {
+		/* For gso the rest of the linear portion of the skb needs to
+		 * be in its own descriptor.
+		 */
+		payload_nfrags++;
+		gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+				     1 + payload_nfrags, hlen, addr);
+
+		len -= hlen;
+		addr += hlen;
+		seg_desc = &tx->desc[(tx->req + 1) & tx->mask];
+		seg_idx_bias = 2;
+		gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+	} else {
+		seg_idx_bias = 1;
+		gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+				     1 + payload_nfrags, hlen, addr);
+	}
+
+	for (i = 0; i < payload_nfrags - (seg_idx_bias - 1); i++) {
+		const skb_frag_t* frag = &shinfo->frags[i];
+
+		idx = (tx->req + i + seg_idx_bias) & tx->mask;
+		seg_desc = &tx->desc[idx];
+		len = skb_frag_size(frag);
+		addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(tx->dev, addr))) {
+			priv->dma_mapping_error++;
+			goto unmap_drop;
+		}
+		buf = &tx->info[idx].buf;
+		dma_unmap_len_set(buf, len, -len);
+		dma_unmap_addr_set(buf, dma, addr);
+
+		gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+	}
+
+	return 1 + payload_nfrags;
+
+unmap_drop:
+	i--;
+	for (last_mapped = i + seg_idx_bias; last_mapped >= 0; last_mapped--) {
+		idx = (tx->req + last_mapped) & tx->mask;
+		gve_tx_unmap_buf(tx->dev, &tx->info[idx].buf);
+	}
+drop:
+	tx->dropped_pkt++;
+	return 0;
+}
+
 netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct gve_priv *priv = netdev_priv(dev);
@@ -491,20 +617,34 @@
 		 * may have added descriptors without ringing the doorbell.
 		 */
 
+		/* Ensure tx descs from a prior gve_tx are visible before
+		 * ringing doorbell.
+		 */
+		dma_wmb();
 		gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
 		return NETDEV_TX_BUSY;
 	}
-	nsegs = gve_tx_add_skb(tx, skb, &priv->pdev->dev);
+	if (tx->raw_addressing)
+		nsegs = gve_tx_add_skb_no_copy(priv, tx, skb);
+	else
+		nsegs = gve_tx_add_skb_copy(priv, tx, skb);
 
-	netdev_tx_sent_queue(tx->netdev_txq, skb->len);
-	skb_tx_timestamp(skb);
+	/* If the packet is getting sent, we need to update the skb */
+	if (nsegs) {
+		netdev_tx_sent_queue(tx->netdev_txq, skb->len);
+		skb_tx_timestamp(skb);
+	}
 
-	/* give packets to NIC */
+	/* Give packets to NIC. Even if this packet failed to send the doorbell
+	 * might need to be rung because of xmit_more.
+	 */
 	tx->req += nsegs;
 
 	if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
 		return NETDEV_TX_OK;
 
+	/* Ensure tx descs are visible before ringing doorbell */
+	dma_wmb();
 	gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
 	return NETDEV_TX_OK;
 }
@@ -529,24 +669,31 @@
 		info = &tx->info[idx];
 		skb = info->skb;
 
+		/* Unmap the buffer */
+		if (tx->raw_addressing)
+			gve_tx_unmap_buf(tx->dev, &tx->info[idx].buf);
 		/* Mark as free */
 		if (skb) {
 			info->skb = NULL;
 			bytes += skb->len;
 			pkts++;
 			dev_consume_skb_any(skb);
-			/* FIFO free */
-			for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
-				space_freed += info->iov[i].iov_len +
-					       info->iov[i].iov_padding;
-				info->iov[i].iov_len = 0;
-				info->iov[i].iov_padding = 0;
+			if (!tx->raw_addressing) {
+				/* FIFO free */
+				for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
+					space_freed += info->iov[i].iov_len +
+						       info->iov[i].iov_padding;
+					info->iov[i].iov_len = 0;
+					info->iov[i].iov_padding = 0;
+				}
 			}
 		}
 		tx->done++;
 	}
 
-	gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+	if (!tx->raw_addressing) {
+		gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+	}
 	u64_stats_update_begin(&tx->statss);
 	tx->bytes_done += bytes;
 	tx->pkt_done += pkts;

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1e44482..fb86ab8 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c

@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/balloon_compaction.h>
+#include <linux/oom.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/mount.h>
@@ -27,7 +28,9 @@
  */
 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
-#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+/* Maximum number of (4k) pages to deflate on OOM notifications. */
+#define VIRTIO_BALLOON_OOM_NR_PAGES 256
+#define VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY 80
 
 #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
 					     __GFP_NOMEMALLOC)
@@ -111,8 +114,11 @@
 	/* Memory statistics */
 	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
 
-	/* To register a shrinker to shrink memory upon memory pressure */
+	/* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */
 	struct shrinker shrinker;
+
+	/* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */
+	struct notifier_block oom_nb;
 };
 
 static struct virtio_device_id id_table[] = {
@@ -791,50 +797,13 @@
 	return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
 }
 
-static unsigned long leak_balloon_pages(struct virtio_balloon *vb,
-                                          unsigned long pages_to_free)
-{
-	return leak_balloon(vb, pages_to_free * VIRTIO_BALLOON_PAGES_PER_PAGE) /
-		VIRTIO_BALLOON_PAGES_PER_PAGE;
-}
-
-static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
-					  unsigned long pages_to_free)
-{
-	unsigned long pages_freed = 0;
-
-	/*
-	 * One invocation of leak_balloon can deflate at most
-	 * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
-	 * multiple times to deflate pages till reaching pages_to_free.
-	 */
-	while (vb->num_pages && pages_freed < pages_to_free)
-		pages_freed += leak_balloon_pages(vb,
-						  pages_to_free - pages_freed);
-
-	update_balloon_size(vb);
-
-	return pages_freed;
-}
-
 static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
 						  struct shrink_control *sc)
 {
-	unsigned long pages_to_free, pages_freed = 0;
 	struct virtio_balloon *vb = container_of(shrinker,
 					struct virtio_balloon, shrinker);
 
-	pages_to_free = sc->nr_to_scan;
-
-	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
-		pages_freed = shrink_free_pages(vb, pages_to_free);
-
-	if (pages_freed >= pages_to_free)
-		return pages_freed;
-
-	pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
-
-	return pages_freed;
+	return shrink_free_pages(vb, sc->nr_to_scan);
 }
 
 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
@@ -842,12 +811,22 @@
 {
 	struct virtio_balloon *vb = container_of(shrinker,
 					struct virtio_balloon, shrinker);
-	unsigned long count;
 
-	count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
-	count += vb->num_free_page_blocks << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+	return vb->num_free_page_blocks << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+}
 
-	return count;
+static int virtio_balloon_oom_notify(struct notifier_block *nb,
+				     unsigned long dummy, void *parm)
+{
+	struct virtio_balloon *vb = container_of(nb,
+						 struct virtio_balloon, oom_nb);
+	unsigned long *freed = parm;
+
+	*freed += leak_balloon(vb, VIRTIO_BALLOON_OOM_NR_PAGES) /
+		  VIRTIO_BALLOON_PAGES_PER_PAGE;
+	update_balloon_size(vb);
+
+	return NOTIFY_OK;
 }
 
 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
@@ -933,27 +912,37 @@
 						  VIRTIO_BALLOON_CMD_ID_STOP);
 		spin_lock_init(&vb->free_page_list_lock);
 		INIT_LIST_HEAD(&vb->free_page_list);
+		/*
+		 * We're allowed to reuse any free pages, even if they are
+		 * still to be processed by the host.
+		 */
+		err = virtio_balloon_register_shrinker(vb);
+		if (err)
+			goto out_del_balloon_wq;
 		if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
 			memset(&poison_val, PAGE_POISON, sizeof(poison_val));
 			virtio_cwrite(vb->vdev, struct virtio_balloon_config,
 				      poison_val, &poison_val);
 		}
 	}
-	/*
-	 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
-	 * shrinker needs to be registered to relieve memory pressure.
-	 */
+
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
-		err = virtio_balloon_register_shrinker(vb);
-		if (err)
-			goto out_del_balloon_wq;
+		vb->oom_nb.notifier_call = virtio_balloon_oom_notify;
+		vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY;
+		err = register_oom_notifier(&vb->oom_nb);
+		if (err < 0)
+			goto out_unregister_shrinker;
 	}
+
 	virtio_device_ready(vdev);
 
 	if (towards_target(vb))
 		virtballoon_changed(vdev);
 	return 0;
 
+out_unregister_shrinker:
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		virtio_balloon_unregister_shrinker(vb);
 out_del_balloon_wq:
 	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
 		destroy_workqueue(vb->balloon_wq);
@@ -993,6 +982,8 @@
 	struct virtio_balloon *vb = vdev->priv;
 
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+		unregister_oom_notifier(&vb->oom_nb);
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
 		virtio_balloon_unregister_shrinker(vb);
 	spin_lock_irq(&vb->stop_update_lock);
 	vb->stop_update = true;

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 031ff3f..29f10d5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c

@@ -412,7 +412,8 @@
  * Return buffer_head on success or NULL in case of failure.
  */
 struct buffer_head *
-ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
+			      bool ignore_locked)
 {
 	struct ext4_group_desc *desc;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -440,6 +441,12 @@
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (ignore_locked && buffer_locked(bh)) {
+		/* buffer under IO already, return if called for prefetching */
+		put_bh(bh);
+		return NULL;
+	}
+
 	if (bitmap_uptodate(bh))
 		goto verify;
 
@@ -486,10 +493,11 @@
 	 * submit the buffer_head for reading
 	 */
 	set_buffer_new(bh);
-	trace_ext4_read_block_bitmap_load(sb, block_group);
+	trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
 	bh->b_end_io = ext4_end_bitmap_read;
 	get_bh(bh);
-	submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
+	submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO |
+		  (ignore_locked ? REQ_RAHEAD : 0), bh);
 	return bh;
 verify:
 	err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
@@ -532,7 +540,7 @@
 	struct buffer_head *bh;
 	int err;
 
-	bh = ext4_read_block_bitmap_nowait(sb, block_group);
+	bh = ext4_read_block_bitmap_nowait(sb, block_group, false);
 	if (IS_ERR(bh))
 		return bh;
 	err = ext4_wait_block_bitmap(sb, block_group, bh);

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ae2cb15..e6ad6fd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h

@@ -198,6 +198,12 @@
  */
 #define	EXT4_IO_END_UNWRITTEN	0x0001
 
+struct ext4_io_end_vec {
+	struct list_head list;		/* list of io_end_vec */
+	loff_t offset;			/* offset in the file */
+	ssize_t size;			/* size of the extent */
+};
+
 /*
  * For converting unwritten extents on a work queue. 'handle' is used for
  * buffered writeback.
@@ -211,8 +217,7 @@
 						 * bios covering the extent */
 	unsigned int		flag;		/* unwritten or not */
 	atomic_t		count;		/* reference counter */
-	loff_t			offset;		/* offset in the file */
-	ssize_t			size;		/* size of the extent */
+	struct list_head	list_vec;	/* list of ext4_io_end_vec */
 } ext4_io_end_t;
 
 struct ext4_io_submit {
@@ -1148,6 +1153,7 @@
 #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
 #define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
+#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
 #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
@@ -1481,6 +1487,8 @@
 	/* where last allocation was done - for stream allocation */
 	unsigned long s_mb_last_group;
 	unsigned long s_mb_last_start;
+	unsigned int s_mb_prefetch;
+	unsigned int s_mb_prefetch_limit;
 
 	/* stats for buddy allocator */
 	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
@@ -2221,9 +2229,15 @@
 	struct mutex		li_list_mtx;
 };
 
+enum ext4_li_mode {
+	EXT4_LI_MODE_PREFETCH_BBITMAP,
+	EXT4_LI_MODE_ITABLE,
+};
+
 struct ext4_li_request {
 	struct super_block	*lr_super;
-	struct ext4_sb_info	*lr_sbi;
+	enum ext4_li_mode	lr_mode;
+	ext4_group_t		lr_first_not_zeroed;
 	ext4_group_t		lr_next_group;
 	struct list_head	lr_request;
 	unsigned long		lr_next_sched;
@@ -2354,7 +2368,8 @@
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
 
 extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
-						ext4_group_t block_group);
+						ext4_group_t block_group,
+						bool ignore_locked);
 extern int ext4_wait_block_bitmap(struct super_block *sb,
 				  ext4_group_t block_group,
 				  struct buffer_head *bh);
@@ -2563,6 +2578,12 @@
 extern void ext4_discard_preallocations(struct inode *);
 extern int __init ext4_init_mballoc(void);
 extern void ext4_exit_mballoc(void);
+extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
+				     ext4_group_t group,
+				     unsigned int nr, int *cnt);
+extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
+				  unsigned int nr);
+
 extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
 			     struct buffer_head *bh, ext4_fsblk_t block,
 			     unsigned long count, int flags);
@@ -3041,6 +3062,7 @@
 	(1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
 #define EXT4_GROUP_INFO_IBITMAP_CORRUPT		\
 	(1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
+#define EXT4_GROUP_INFO_BBITMAP_READ_BIT	4
 
 #define EXT4_MB_GRP_NEED_INIT(grp)	\
 	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
@@ -3055,6 +3077,8 @@
 	(set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
 #define EXT4_MB_GRP_CLEAR_TRIMMED(grp)	\
 	(clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+#define EXT4_MB_GRP_TEST_AND_SET_READ(grp)	\
+	(test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state)))
 
 #define EXT4_MAX_CONTENTION		8
 #define EXT4_CONTENTION_THRESHOLD	2
@@ -3290,6 +3314,8 @@
 			  loff_t len);
 extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
 					  loff_t offset, ssize_t len);
+extern int ext4_convert_unwritten_io_end_vec(handle_t *handle,
+					     ext4_io_end_t *io_end);
 extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
 			   struct ext4_map_blocks *map, int flags);
 extern int ext4_ext_calc_metadata_amount(struct inode *inode,
@@ -3348,6 +3374,8 @@
 			       int len,
 			       struct writeback_control *wbc,
 			       bool keep_towrite);
+extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
+extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
 
 /* mmp.c */
 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ae73e679..abc88d3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c

@@ -4975,23 +4975,13 @@
 	int ret = 0;
 	int ret2 = 0;
 	struct ext4_map_blocks map;
-	unsigned int credits, blkbits = inode->i_blkbits;
+	unsigned int blkbits = inode->i_blkbits;
+	unsigned int credits = 0;
 
 	map.m_lblk = offset >> blkbits;
 	max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
 
-	/*
-	 * This is somewhat ugly but the idea is clear: When transaction is
-	 * reserved, everything goes into it. Otherwise we rather start several
-	 * smaller transactions for conversion of each extent separately.
-	 */
-	if (handle) {
-		handle = ext4_journal_start_reserved(handle,
-						     EXT4_HT_EXT_CONVERT);
-		if (IS_ERR(handle))
-			return PTR_ERR(handle);
-		credits = 0;
-	} else {
+	if (!handle) {
 		/*
 		 * credits to insert 1 extent into extent tree
 		 */
@@ -5022,11 +5012,40 @@
 		if (ret <= 0 || ret2)
 			break;
 	}
-	if (!credits)
-		ret2 = ext4_journal_stop(handle);
 	return ret > 0 ? ret2 : ret;
 }
 
+int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
+{
+	int ret, err = 0;
+	struct ext4_io_end_vec *io_end_vec;
+
+	/*
+	 * This is somewhat ugly but the idea is clear: When transaction is
+	 * reserved, everything goes into it. Otherwise we rather start several
+	 * smaller transactions for conversion of each extent separately.
+	 */
+	if (handle) {
+		handle = ext4_journal_start_reserved(handle,
+						     EXT4_HT_EXT_CONVERT);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+	}
+
+	list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
+		ret = ext4_convert_unwritten_extents(handle, io_end->inode,
+						     io_end_vec->offset,
+						     io_end_vec->size);
+		if (ret)
+			break;
+	}
+
+	if (handle)
+		err = ext4_journal_stop(handle);
+
+	return ret < 0 ? ret : err;
+}
+
 /*
  * If newes is not existing extent (newes->ec_pblk equals zero) find
  * delayed extent at start of newes and update newes accordingly and

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 48b4673..076e2e3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c

@@ -2364,6 +2364,79 @@
 }
 
 /*
+ * mpage_process_page - update page buffers corresponding to changed extent and
+ *		       may submit fully mapped page for IO
+ *
+ * @mpd		- description of extent to map, on return next extent to map
+ * @m_lblk	- logical block mapping.
+ * @m_pblk	- corresponding physical mapping.
+ * @map_bh	- determines on return whether this page requires any further
+ *		  mapping or not.
+ * Scan given page buffers corresponding to changed extent and update buffer
+ * state according to new extent state.
+ * We map delalloc buffers to their physical location, clear unwritten bits.
+ * If the given page is not fully mapped, we update @map to the next extent in
+ * the given page that needs mapping & return @map_bh as true.
+ */
+static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
+			      ext4_lblk_t *m_lblk, ext4_fsblk_t *m_pblk,
+			      bool *map_bh)
+{
+	struct buffer_head *head, *bh;
+	ext4_io_end_t *io_end = mpd->io_submit.io_end;
+	ext4_lblk_t lblk = *m_lblk;
+	ext4_fsblk_t pblock = *m_pblk;
+	int err = 0;
+	int blkbits = mpd->inode->i_blkbits;
+	ssize_t io_end_size = 0;
+	struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end);
+
+	bh = head = page_buffers(page);
+	do {
+		if (lblk < mpd->map.m_lblk)
+			continue;
+		if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
+			/*
+			 * Buffer after end of mapped extent.
+			 * Find next buffer in the page to map.
+			 */
+			mpd->map.m_len = 0;
+			mpd->map.m_flags = 0;
+			io_end_vec->size += io_end_size;
+			io_end_size = 0;
+
+			err = mpage_process_page_bufs(mpd, head, bh, lblk);
+			if (err > 0)
+				err = 0;
+			if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) {
+				io_end_vec = ext4_alloc_io_end_vec(io_end);
+				if (IS_ERR(io_end_vec)) {
+					err = PTR_ERR(io_end_vec);
+					goto out;
+				}
+				io_end_vec->offset = mpd->map.m_lblk << blkbits;
+			}
+			*map_bh = true;
+			goto out;
+		}
+		if (buffer_delay(bh)) {
+			clear_buffer_delay(bh);
+			bh->b_blocknr = pblock++;
+		}
+		clear_buffer_unwritten(bh);
+		io_end_size += (1 << blkbits);
+	} while (lblk++, (bh = bh->b_this_page) != head);
+
+	io_end_vec->size += io_end_size;
+	io_end_size = 0;
+	*map_bh = false;
+out:
+	*m_lblk = lblk;
+	*m_pblk = pblock;
+	return err;
+}
+
+/*
  * mpage_map_buffers - update buffers corresponding to changed extent and
  *		       submit fully mapped pages for IO
  *
@@ -2382,12 +2455,12 @@
 	struct pagevec pvec;
 	int nr_pages, i;
 	struct inode *inode = mpd->inode;
-	struct buffer_head *head, *bh;
 	int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
 	pgoff_t start, end;
 	ext4_lblk_t lblk;
-	sector_t pblock;
+	ext4_fsblk_t pblock;
 	int err;
+	bool map_bh = false;
 
 	start = mpd->map.m_lblk >> bpp_bits;
 	end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
@@ -2403,50 +2476,19 @@
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
 
-			bh = head = page_buffers(page);
-			do {
-				if (lblk < mpd->map.m_lblk)
-					continue;
-				if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
-					/*
-					 * Buffer after end of mapped extent.
-					 * Find next buffer in the page to map.
-					 */
-					mpd->map.m_len = 0;
-					mpd->map.m_flags = 0;
-					/*
-					 * FIXME: If dioread_nolock supports
-					 * blocksize < pagesize, we need to make
-					 * sure we add size mapped so far to
-					 * io_end->size as the following call
-					 * can submit the page for IO.
-					 */
-					err = mpage_process_page_bufs(mpd, head,
-								      bh, lblk);
-					pagevec_release(&pvec);
-					if (err > 0)
-						err = 0;
-					return err;
-				}
-				if (buffer_delay(bh)) {
-					clear_buffer_delay(bh);
-					bh->b_blocknr = pblock++;
-				}
-				clear_buffer_unwritten(bh);
-			} while (lblk++, (bh = bh->b_this_page) != head);
-
+			err = mpage_process_page(mpd, page, &lblk, &pblock,
+						 &map_bh);
 			/*
-			 * FIXME: This is going to break if dioread_nolock
-			 * supports blocksize < pagesize as we will try to
-			 * convert potentially unmapped parts of inode.
+			 * If map_bh is true, means page may require further bh
+			 * mapping, or maybe the page was submitted for IO.
+			 * So we return to call further extent mapping.
 			 */
-			mpd->io_submit.io_end->size += PAGE_SIZE;
+			if (err < 0 || map_bh == true)
+				goto out;
 			/* Page fully mapped - let IO run! */
 			err = mpage_submit_page(mpd, page);
-			if (err < 0) {
-				pagevec_release(&pvec);
-				return err;
-			}
+			if (err < 0)
+				goto out;
 		}
 		pagevec_release(&pvec);
 	}
@@ -2454,6 +2496,9 @@
 	mpd->map.m_len = 0;
 	mpd->map.m_flags = 0;
 	return 0;
+out:
+	pagevec_release(&pvec);
+	return err;
 }
 
 static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
@@ -2533,9 +2578,13 @@
 	int err;
 	loff_t disksize;
 	int progress = 0;
+	ext4_io_end_t *io_end = mpd->io_submit.io_end;
+	struct ext4_io_end_vec *io_end_vec;
 
-	mpd->io_submit.io_end->offset =
-				((loff_t)map->m_lblk) << inode->i_blkbits;
+	io_end_vec = ext4_alloc_io_end_vec(io_end);
+	if (IS_ERR(io_end_vec))
+		return PTR_ERR(io_end_vec);
+	io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits;
 	do {
 		err = mpage_map_one_extent(handle, mpd);
 		if (err < 0) {
@@ -3642,6 +3691,7 @@
 			    ssize_t size, void *private)
 {
         ext4_io_end_t *io_end = private;
+	struct ext4_io_end_vec *io_end_vec;
 
 	/* if not async direct IO just return */
 	if (!io_end)
@@ -3659,8 +3709,9 @@
 		ext4_clear_io_unwritten_flag(io_end);
 		size = 0;
 	}
-	io_end->offset = offset;
-	io_end->size = size;
+	io_end_vec = ext4_alloc_io_end_vec(io_end);
+	io_end_vec->offset = offset;
+	io_end_vec->size = size;
 	ext4_put_io_end(io_end);
 
 	return 0;

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b67ea979..2429af6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c

@@ -861,7 +861,7 @@
 			bh[i] = NULL;
 			continue;
 		}
-		bh[i] = ext4_read_block_bitmap_nowait(sb, group);
+		bh[i] = ext4_read_block_bitmap_nowait(sb, group, false);
 		if (IS_ERR(bh[i])) {
 			err = PTR_ERR(bh[i]);
 			bh[i] = NULL;
@@ -2045,6 +2045,7 @@
 				ext4_group_t group, int cr)
 {
 	unsigned free, fragments;
+	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
 
@@ -2061,7 +2062,26 @@
 
 	/* We only do this if the grp has never been initialized */
 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-		int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
+		struct ext4_group_desc *gdp =
+			ext4_get_group_desc(ac->ac_sb, group, NULL);
+		int ret;
+
+		/* cr=0/1 is a very optimistic search to find large
+		 * good chunks almost for free.  If buddy data is not
+		 * ready, then this optimization makes no sense.  But
+		 * we never skip the first block group in a flex_bg,
+		 * since this gets used for metadata block allocation,
+		 * and we want to make sure we locate metadata blocks
+		 * in the first block group in the flex_bg if
+		 * possible.
+		 */
+		if (cr < 2 &&
+		    (!sbi->s_log_groups_per_flex ||
+		     ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
+		    !(ext4_has_group_desc_csum(ac->ac_sb) &&
+		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))))
+			return 0;
+		ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
 		if (ret)
 			return ret;
 	}
@@ -2105,12 +2125,91 @@
 	return 0;
 }
 
+/*
+ * Start prefetching @nr block bitmaps starting at @group.
+ * Return the next group which needs to be prefetched.
+ */
+ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
+			      unsigned int nr, int *cnt)
+{
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
+	struct buffer_head *bh;
+	struct blk_plug plug;
+
+	blk_start_plug(&plug);
+	while (nr-- > 0) {
+		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
+								  NULL);
+		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+
+		/*
+		 * Prefetch block groups with free blocks; but don't
+		 * bother if it is marked uninitialized on disk, since
+		 * it won't require I/O to read.  Also only try to
+		 * prefetch once, so we avoid getblk() call, which can
+		 * be expensive.
+		 */
+		if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
+		    EXT4_MB_GRP_NEED_INIT(grp) &&
+		    ext4_free_group_clusters(sb, gdp) > 0 &&
+		    !(ext4_has_group_desc_csum(sb) &&
+		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
+			bh = ext4_read_block_bitmap_nowait(sb, group, true);
+			if (bh && !IS_ERR(bh)) {
+				if (!buffer_uptodate(bh) && cnt)
+					(*cnt)++;
+				brelse(bh);
+			}
+		}
+		if (++group >= ngroups)
+			group = 0;
+	}
+	blk_finish_plug(&plug);
+	return group;
+}
+
+/*
+ * Prefetching reads the block bitmap into the buffer cache; but we
+ * need to make sure that the buddy bitmap in the page cache has been
+ * initialized.  Note that ext4_mb_init_group() will block if the I/O
+ * is not yet completed, or indeed if it was not initiated by
+ * ext4_mb_prefetch did not start the I/O.
+ *
+ * TODO: We should actually kick off the buddy bitmap setup in a work
+ * queue when the buffer I/O is completed, so that we don't block
+ * waiting for the block allocation bitmap read to finish when
+ * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
+ */
+void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
+			   unsigned int nr)
+{
+	while (nr-- > 0) {
+		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
+								  NULL);
+		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+
+		if (!group)
+			group = ext4_get_groups_count(sb);
+		group--;
+		grp = ext4_get_group_info(sb, group);
+
+		if (EXT4_MB_GRP_NEED_INIT(grp) &&
+		    ext4_free_group_clusters(sb, gdp) > 0 &&
+		    !(ext4_has_group_desc_csum(sb) &&
+		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
+			if (ext4_mb_init_group(sb, group, GFP_NOFS))
+				break;
+		}
+	}
+}
+
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
-	ext4_group_t ngroups, group, i;
+	ext4_group_t prefetch_grp = 0, ngroups, group, i;
 	int cr;
 	int err = 0, first_err = 0;
+	unsigned int nr = 0, prefetch_ios = 0;
 	struct ext4_sb_info *sbi;
 	struct super_block *sb;
 	struct ext4_buddy e4b;
@@ -2178,6 +2277,7 @@
 		 * from the goal value specified
 		 */
 		group = ac->ac_g_ex.fe_group;
+		prefetch_grp = group;
 
 		for (i = 0; i < ngroups; group++, i++) {
 			int ret = 0;
@@ -2189,6 +2289,29 @@
 			if (group >= ngroups)
 				group = 0;
 
+			/*
+			 * Batch reads of the block allocation bitmaps
+			 * to get multiple READs in flight; limit
+			 * prefetching at cr=0/1, otherwise mballoc can
+			 * spend a lot of time loading imperfect groups
+			 */
+			if ((prefetch_grp == group) &&
+			    (cr > 1 ||
+			     prefetch_ios < sbi->s_mb_prefetch_limit)) {
+				unsigned int curr_ios = prefetch_ios;
+
+				nr = sbi->s_mb_prefetch;
+				if (ext4_has_feature_flex_bg(sb)) {
+					nr = (group / sbi->s_mb_prefetch) *
+						sbi->s_mb_prefetch;
+					nr = nr + sbi->s_mb_prefetch - group;
+				}
+				prefetch_grp = ext4_mb_prefetch(sb, group,
+							nr, &prefetch_ios);
+				if (prefetch_ios == curr_ios)
+					nr = 0;
+			}
+
 			/* This now checks without needing the buddy page */
 			ret = ext4_mb_good_group(ac, group, cr);
 			if (ret <= 0) {
@@ -2261,6 +2384,10 @@
 out:
 	if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
 		err = first_err;
+
+	if (nr)
+		ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
+
 	return err;
 }
 
@@ -2520,6 +2647,26 @@
 			goto err_freebuddy;
 	}
 
+	if (ext4_has_feature_flex_bg(sb)) {
+		/* a single flex group is supposed to be read by a single IO */
+		sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex;
+		sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
+	} else {
+		sbi->s_mb_prefetch = 32;
+	}
+	if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
+		sbi->s_mb_prefetch = ext4_get_groups_count(sb);
+	/* now many real IOs to prefetch within a single allocation at cr=0
+	 * given cr=0 is an CPU-related optimization we shouldn't try to
+	 * load too many groups, at some point we should start to use what
+	 * we've got in memory.
+	 * with an average random access time 5ms, it'd take a second to get
+	 * 200 groups (* N with flex_bg), so let's make this limit 4
+	 */
+	sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4;
+	if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb))
+		sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb);
+
 	return 0;
 
 err_freebuddy:

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 2cc9f21..47a57fc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c

@@ -31,18 +31,56 @@
 #include "acl.h"
 
 static struct kmem_cache *io_end_cachep;
+static struct kmem_cache *io_end_vec_cachep;
 
 int __init ext4_init_pageio(void)
 {
 	io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
 	if (io_end_cachep == NULL)
 		return -ENOMEM;
+
+	io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0);
+	if (io_end_vec_cachep == NULL) {
+		kmem_cache_destroy(io_end_cachep);
+		return -ENOMEM;
+	}
 	return 0;
 }
 
 void ext4_exit_pageio(void)
 {
 	kmem_cache_destroy(io_end_cachep);
+	kmem_cache_destroy(io_end_vec_cachep);
+}
+
+struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
+{
+	struct ext4_io_end_vec *io_end_vec;
+
+	io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS);
+	if (!io_end_vec)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&io_end_vec->list);
+	list_add_tail(&io_end_vec->list, &io_end->list_vec);
+	return io_end_vec;
+}
+
+static void ext4_free_io_end_vec(ext4_io_end_t *io_end)
+{
+	struct ext4_io_end_vec *io_end_vec, *tmp;
+
+	if (list_empty(&io_end->list_vec))
+		return;
+	list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) {
+		list_del(&io_end_vec->list);
+		kmem_cache_free(io_end_vec_cachep, io_end_vec);
+	}
+}
+
+struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end)
+{
+	BUG_ON(list_empty(&io_end->list_vec));
+	return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list);
 }
 
 /*
@@ -125,6 +163,7 @@
 		ext4_finish_bio(bio);
 		bio_put(bio);
 	}
+	ext4_free_io_end_vec(io_end);
 	kmem_cache_free(io_end_cachep, io_end);
 }
 
@@ -136,29 +175,26 @@
  * cannot get to ext4_ext_truncate() before all IOs overlapping that range are
  * completed (happens from ext4_free_ioend()).
  */
-static int ext4_end_io(ext4_io_end_t *io)
+static int ext4_end_io_end(ext4_io_end_t *io_end)
 {
-	struct inode *inode = io->inode;
-	loff_t offset = io->offset;
-	ssize_t size = io->size;
-	handle_t *handle = io->handle;
+	struct inode *inode = io_end->inode;
+	handle_t *handle = io_end->handle;
 	int ret = 0;
 
-	ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
+	ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p,"
 		   "list->prev 0x%p\n",
-		   io, inode->i_ino, io->list.next, io->list.prev);
+		   io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
 
-	io->handle = NULL;	/* Following call will use up the handle */
-	ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
+	io_end->handle = NULL;	/* Following call will use up the handle */
+	ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
 	if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
 		ext4_msg(inode->i_sb, KERN_EMERG,
 			 "failed to convert unwritten extents to written "
 			 "extents -- potential data loss!  "
-			 "(inode %lu, offset %llu, size %zd, error %d)",
-			 inode->i_ino, offset, size, ret);
+			 "(inode %lu, error %d)", inode->i_ino, ret);
 	}
-	ext4_clear_io_unwritten_flag(io);
-	ext4_release_io_end(io);
+	ext4_clear_io_unwritten_flag(io_end);
+	ext4_release_io_end(io_end);
 	return ret;
 }
 
@@ -166,21 +202,21 @@
 {
 #ifdef	EXT4FS_DEBUG
 	struct list_head *cur, *before, *after;
-	ext4_io_end_t *io, *io0, *io1;
+	ext4_io_end_t *io_end, *io_end0, *io_end1;
 
 	if (list_empty(head))
 		return;
 
 	ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
-	list_for_each_entry(io, head, list) {
-		cur = &io->list;
+	list_for_each_entry(io_end, head, list) {
+		cur = &io_end->list;
 		before = cur->prev;
-		io0 = container_of(before, ext4_io_end_t, list);
+		io_end0 = container_of(before, ext4_io_end_t, list);
 		after = cur->next;
-		io1 = container_of(after, ext4_io_end_t, list);
+		io_end1 = container_of(after, ext4_io_end_t, list);
 
 		ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
-			    io, inode->i_ino, io0, io1);
+			    io_end, inode->i_ino, io_end0, io_end1);
 	}
 #endif
 }
@@ -207,7 +243,7 @@
 static int ext4_do_flush_completed_IO(struct inode *inode,
 				      struct list_head *head)
 {
-	ext4_io_end_t *io;
+	ext4_io_end_t *io_end;
 	struct list_head unwritten;
 	unsigned long flags;
 	struct ext4_inode_info *ei = EXT4_I(inode);
@@ -219,11 +255,11 @@
 	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 
 	while (!list_empty(&unwritten)) {
-		io = list_entry(unwritten.next, ext4_io_end_t, list);
-		BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
-		list_del_init(&io->list);
+		io_end = list_entry(unwritten.next, ext4_io_end_t, list);
+		BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
+		list_del_init(&io_end->list);
 
-		err = ext4_end_io(io);
+		err = ext4_end_io_end(io_end);
 		if (unlikely(!ret && err))
 			ret = err;
 	}
@@ -242,19 +278,22 @@
 
 ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
 {
-	ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
-	if (io) {
-		io->inode = inode;
-		INIT_LIST_HEAD(&io->list);
-		atomic_set(&io->count, 1);
+	ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags);
+
+	if (io_end) {
+		io_end->inode = inode;
+		INIT_LIST_HEAD(&io_end->list);
+		INIT_LIST_HEAD(&io_end->list_vec);
+		atomic_set(&io_end->count, 1);
 	}
-	return io;
+	return io_end;
 }
 
 void ext4_put_io_end_defer(ext4_io_end_t *io_end)
 {
 	if (atomic_dec_and_test(&io_end->count)) {
-		if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
+		if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
+				list_empty(&io_end->list_vec)) {
 			ext4_release_io_end(io_end);
 			return;
 		}
@@ -268,9 +307,8 @@
 
 	if (atomic_dec_and_test(&io_end->count)) {
 		if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
-			err = ext4_convert_unwritten_extents(io_end->handle,
-						io_end->inode, io_end->offset,
-						io_end->size);
+			err = ext4_convert_unwritten_io_end_vec(io_end->handle,
+								io_end);
 			io_end->handle = NULL;
 			ext4_clear_io_unwritten_flag(io_end);
 		}
@@ -307,10 +345,8 @@
 		struct inode *inode = io_end->inode;
 
 		ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
-			     "(offset %llu size %ld starting block %llu)",
+			     "starting block %llu)",
 			     bio->bi_status, inode->i_ino,
-			     (unsigned long long) io_end->offset,
-			     (long) io_end->size,
 			     (unsigned long long)
 			     bi_sector >> (inode->i_blkbits - 9));
 		mapping_set_error(inode->i_mapping,

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1211ae2..9a57019 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c

@@ -1475,6 +1475,7 @@
 	Opt_dioread_nolock, Opt_dioread_lock,
 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+	Opt_prefetch_block_bitmaps,
 };
 
 static const match_table_t tokens = {
@@ -1561,6 +1562,7 @@
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
 	{Opt_nombcache, "nombcache"},
 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
+	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
 	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
@@ -1771,6 +1773,8 @@
 	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
 	{Opt_test_dummy_encryption, 0, MOPT_GTE0},
 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
+	 MOPT_SET},
 	{Opt_err, 0, 0}
 };
 
@@ -2084,7 +2088,7 @@
 			 unsigned int *journal_ioprio,
 			 int is_remount)
 {
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
 	char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name;
 	substring_t args[MAX_OPT_ARGS];
 	int token;
@@ -2138,16 +2142,6 @@
 		}
 	}
 #endif
-	if (test_opt(sb, DIOREAD_NOLOCK)) {
-		int blocksize =
-			BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
-
-		if (blocksize < PAGE_SIZE) {
-			ext4_msg(sb, KERN_ERR, "can't mount with "
-				 "dioread_nolock if block size != PAGE_SIZE");
-			return 0;
-		}
-	}
 	return 1;
 }
 
@@ -3069,15 +3063,34 @@
 static int ext4_run_li_request(struct ext4_li_request *elr)
 {
 	struct ext4_group_desc *gdp = NULL;
-	ext4_group_t group, ngroups;
-	struct super_block *sb;
+	struct super_block *sb = elr->lr_super;
+	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t group = elr->lr_next_group;
 	unsigned long timeout = 0;
+	unsigned int prefetch_ios = 0;
 	int ret = 0;
 
-	sb = elr->lr_super;
-	ngroups = EXT4_SB(sb)->s_groups_count;
+	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
+		elr->lr_next_group = ext4_mb_prefetch(sb, group,
+				EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
+		if (prefetch_ios)
+			ext4_mb_prefetch_fini(sb, elr->lr_next_group,
+					      prefetch_ios);
+		trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
+					    prefetch_ios);
+		if (group >= elr->lr_next_group) {
+			ret = 1;
+			if (elr->lr_first_not_zeroed != ngroups &&
+			    !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
+				elr->lr_next_group = elr->lr_first_not_zeroed;
+				elr->lr_mode = EXT4_LI_MODE_ITABLE;
+				ret = 0;
+			}
+		}
+		return ret;
+	}
 
-	for (group = elr->lr_next_group; group < ngroups; group++) {
+	for (; group < ngroups; group++) {
 		gdp = ext4_get_group_desc(sb, group, NULL);
 		if (!gdp) {
 			ret = 1;
@@ -3095,9 +3108,10 @@
 		timeout = jiffies;
 		ret = ext4_init_inode_table(sb, group,
 					    elr->lr_timeout ? 0 : 1);
+		trace_ext4_lazy_itable_init(sb, group);
 		if (elr->lr_timeout == 0) {
 			timeout = (jiffies - timeout) *
-				  elr->lr_sbi->s_li_wait_mult;
+				EXT4_SB(elr->lr_super)->s_li_wait_mult;
 			elr->lr_timeout = timeout;
 		}
 		elr->lr_next_sched = jiffies + elr->lr_timeout;
@@ -3112,15 +3126,11 @@
  */
 static void ext4_remove_li_request(struct ext4_li_request *elr)
 {
-	struct ext4_sb_info *sbi;
-
 	if (!elr)
 		return;
 
-	sbi = elr->lr_sbi;
-
 	list_del(&elr->lr_request);
-	sbi->s_li_request = NULL;
+	EXT4_SB(elr->lr_super)->s_li_request = NULL;
 	kfree(elr);
 }
 
@@ -3329,7 +3339,6 @@
 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
 					    ext4_group_t start)
 {
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_li_request *elr;
 
 	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
@@ -3337,8 +3346,13 @@
 		return NULL;
 
 	elr->lr_super = sb;
-	elr->lr_sbi = sbi;
-	elr->lr_next_group = start;
+	elr->lr_first_not_zeroed = start;
+	if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
+		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+	else {
+		elr->lr_mode = EXT4_LI_MODE_ITABLE;
+		elr->lr_next_group = start;
+	}
 
 	/*
 	 * Randomize first schedule time of the request to
@@ -3356,6 +3370,7 @@
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_li_request *elr = NULL;
 	ext4_group_t ngroups = sbi->s_groups_count;
+	enum ext4_li_mode lr_mode = EXT4_LI_MODE_ITABLE;
 	int ret = 0;
 
 	mutex_lock(&ext4_li_mtx);
@@ -3368,8 +3383,10 @@
 		goto out;
 	}
 
-	if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
-	    !test_opt(sb, INIT_INODE_TABLE))
+	if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
+		lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+	} else if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
+		   !test_opt(sb, INIT_INODE_TABLE))
 		goto out;
 
 	elr = ext4_li_request_new(sb, first_not_zeroed);

diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9394360..254435e 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c

@@ -200,6 +200,8 @@
 EXT4_ATTR(first_error_time, 0444, first_error_time);
 EXT4_ATTR(last_error_time, 0444, last_error_time);
 EXT4_ATTR(journal_task, 0444, journal_task);
+EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch);
+EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit);
 
 static unsigned int old_bump_val = 128;
 EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
@@ -231,6 +233,8 @@
 	ATTR_LIST(first_error_time),
 	ATTR_LIST(last_error_time),
 	ATTR_LIST(journal_task),
+	ATTR_LIST(mb_prefetch),
+	ATTR_LIST(mb_prefetch_limit),
 	NULL,
 };
 ATTRIBUTE_GROUPS(ext4);

diff --git a/fs/file_table.c b/fs/file_table.c
index 30d55c9..b448f13 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c

@@ -278,6 +278,7 @@
 	}
 	if (file->f_op->release)
 		file->f_op->release(inode, file);
+	security_file_pre_free(file);
 	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
 		     !(mode & FMODE_PATH))) {
 		cdev_put(inode->i_cdev);

diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index cb5629bd..9f650c7 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig

@@ -103,3 +103,11 @@
 config PROC_PID_ARCH_STATUS
 	def_bool n
 	depends on PROC_FS
+
+config PROC_SELF_MEM_READONLY
+	bool "Force /proc/<pid>/mem paths to be read-only"
+	default y
+	help
+	  When enabled, attempts to open /proc/self/mem for write access
+	  will always fail.  Write access to this file allows bypassing
+	  of memory map permissions (such as modifying read-only code).

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5a187e9..020839e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c

@@ -148,6 +148,12 @@
 		NULL, &proc_pid_attr_operations,	\
 		{ .lsm = LSM })
 
+#ifdef CONFIG_PROC_SELF_MEM_READONLY
+# define PROC_PID_MEM_MODE S_IRUSR
+#else
+# define PROC_PID_MEM_MODE S_IRUSR|S_IWUSR
+#endif
+
 /*
  * Count the number of hardlinks for the pid_entry table, excluding the .
  * and .. links.
@@ -886,7 +892,11 @@
 static ssize_t mem_write(struct file *file, const char __user *buf,
 			 size_t count, loff_t *ppos)
 {
+#ifdef CONFIG_PROC_SELF_MEM_READONLY
+	return -EACCES;
+#else
 	return mem_rw(file, (char __user*)buf, count, ppos, 1);
+#endif
 }
 
 loff_t mem_lseek(struct file *file, loff_t offset, int orig)
@@ -3048,7 +3058,7 @@
 #ifdef CONFIG_NUMA
 	REG("numa_maps",  S_IRUGO, proc_pid_numa_maps_operations),
 #endif
-	REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
+	REG("mem",        PROC_PID_MEM_MODE, proc_mem_operations),
 	LNK("cwd",        proc_cwd_link),
 	LNK("root",       proc_root_link),
 	LNK("exe",        proc_exe_link),
@@ -3449,7 +3459,7 @@
 #ifdef CONFIG_NUMA
 	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
 #endif
-	REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
+	REG("mem",       PROC_PID_MEM_MODE, proc_mem_operations),
 	LNK("cwd",       proc_cwd_link),
 	LNK("root",      proc_root_link),
 	LNK("exe",       proc_exe_link),

diff --git a/include/linux/audit.h b/include/linux/audit.h
index aee3dc9..d887925 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h

@@ -94,6 +94,17 @@
 struct audit_ntp_data {};
 #endif
 
+struct audit_task_info {
+	kuid_t			loginuid;
+	unsigned int		sessionid;
+	u64			contid;
+#ifdef CONFIG_AUDITSYSCALL
+	struct audit_context	*ctx;
+#endif
+};
+
+extern struct audit_task_info init_struct_audit;
+
 extern int is_audit_feature_set(int which);
 
 extern int __init audit_register_class(int class, unsigned *list);
@@ -130,6 +141,9 @@
 #ifdef CONFIG_AUDIT
 /* These are defined in audit.c */
 				/* Public API */
+extern int  audit_alloc(struct task_struct *task);
+extern void audit_free(struct task_struct *task);
+extern void __init audit_task_init(void);
 extern __printf(4, 5)
 void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type,
 	       const char *fmt, ...);
@@ -172,12 +186,25 @@
 
 static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
 {
-	return tsk->loginuid;
+	if (!tsk->audit)
+		return INVALID_UID;
+	return tsk->audit->loginuid;
 }
 
 static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 {
-	return tsk->sessionid;
+	if (!tsk->audit)
+		return AUDIT_SID_UNSET;
+	return tsk->audit->sessionid;
+}
+
+extern int audit_set_contid(struct task_struct *tsk, u64 contid);
+
+static inline u64 audit_get_contid(struct task_struct *tsk)
+{
+	if (!tsk->audit)
+		return AUDIT_CID_UNSET;
+	return tsk->audit->contid;
 }
 
 extern u32 audit_enabled;
@@ -185,6 +212,14 @@
 extern int audit_signal_info(int sig, struct task_struct *t);
 
 #else /* CONFIG_AUDIT */
+static inline int audit_alloc(struct task_struct *task)
+{
+	return 0;
+}
+static inline void audit_free(struct task_struct *task)
+{ }
+static inline void __init audit_task_init(void)
+{ }
 static inline __printf(4, 5)
 void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type,
 	       const char *fmt, ...)
@@ -236,6 +271,11 @@
 	return AUDIT_SID_UNSET;
 }
 
+static inline u64 audit_get_contid(struct task_struct *tsk)
+{
+	return AUDIT_CID_UNSET;
+}
+
 #define audit_enabled AUDIT_OFF
 
 static inline int audit_signal_info(int sig, struct task_struct *t)
@@ -260,8 +300,6 @@
 
 /* These are defined in auditsc.c */
 				/* Public API */
-extern int  audit_alloc(struct task_struct *task);
-extern void __audit_free(struct task_struct *task);
 extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
 				  unsigned long a2, unsigned long a3);
 extern void __audit_syscall_exit(int ret_success, long ret_value);
@@ -281,12 +319,14 @@
 
 static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx)
 {
-	task->audit_context = ctx;
+	task->audit->ctx = ctx;
 }
 
 static inline struct audit_context *audit_context(void)
 {
-	return current->audit_context;
+	if (!current->audit)
+		return NULL;
+	return current->audit->ctx;
 }
 
 static inline bool audit_dummy_context(void)
@@ -294,11 +334,7 @@
 	void *p = audit_context();
 	return !p || *(int *)p;
 }
-static inline void audit_free(struct task_struct *task)
-{
-	if (unlikely(task->audit_context))
-		__audit_free(task);
-}
+
 static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
@@ -516,12 +552,6 @@
 extern int audit_n_rules;
 extern int audit_signals;
 #else /* CONFIG_AUDITSYSCALL */
-static inline int audit_alloc(struct task_struct *task)
-{
-	return 0;
-}
-static inline void audit_free(struct task_struct *task)
-{ }
 static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
@@ -654,6 +684,16 @@
 	return uid_valid(audit_get_loginuid(tsk));
 }
 
+static inline bool audit_contid_valid(u64 contid)
+{
+	return contid != AUDIT_CID_UNSET;
+}
+
+static inline bool audit_contid_set(struct task_struct *tsk)
+{
+	return audit_contid_valid(audit_get_contid(tsk));
+}
+
 static inline void audit_log_string(struct audit_buffer *ab, const char *buf)
 {
 	audit_log_n_string(ab, buf, strlen(buf));

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 169fd25..359fdae 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h

@@ -378,6 +378,7 @@
 }
 
 #define cgroup_bpf_enabled (0)
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 66590ae..fb03e75 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h

@@ -205,6 +205,7 @@
 	ARG_CONST_SIZE_OR_ZERO,	/* number of bytes accessed from memory or 0 */
 
 	ARG_PTR_TO_CTX,		/* pointer to context */
+	ARG_PTR_TO_CTX_OR_NULL,	/* pointer to context or NULL */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
 	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
 	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
@@ -1068,6 +1069,7 @@
 extern const struct bpf_func_proto bpf_strtol_proto;
 extern const struct bpf_func_proto bpf_strtoul_proto;
 extern const struct bpf_func_proto bpf_tcp_sock_proto;
+extern const struct bpf_func_proto bpf_jiffies64_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index a376324..1299786 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h

@@ -489,6 +489,10 @@
  * @file_free_security:
  *	Deallocate and free any security structures stored in file->f_security.
  *	@file contains the file structure being modified.
+ * @file_pre_free_security:
+ *	Perform any logging or LSM state updates for a file being deleted
+ *	using fields of the file before they have been cleared.
+ *	@file contains the file structure being freed
  * @file_ioctl:
  *	@file contains the file structure.
  *	@cmd contains the operation to perform.
@@ -565,6 +569,10 @@
  *	@clone_flags contains the flags indicating what should be shared.
  *	Handle allocation of task-related resources.
  *	Returns a zero on success, negative values on failure.
+ * @task_post_alloc:
+ *	@task task being allocated.
+ *	Handle allocation of task-related resources after all task fields are
+ *	filled in.
  * @task_free:
  *	@task task about to be freed.
  *	Handle release of task-related resources. (Note that this can be called
@@ -713,6 +721,9 @@
  *	@cred contains the cred of the process where the signal originated, or
  *	NULL if the current task is the originator.
  *	Return 0 if permission is granted.
+ * @task_exit:
+ *      Called early when a task is exiting before all state is lost.
+ *      @p contains the task_struct for process.
  * @task_prctl:
  *	Check permission before performing a process control operation on the
  *	current process.
@@ -1600,6 +1611,7 @@
 	int (*file_permission)(struct file *file, int mask);
 	int (*file_alloc_security)(struct file *file);
 	void (*file_free_security)(struct file *file);
+	void (*file_pre_free_security)(struct file *file);
 	int (*file_ioctl)(struct file *file, unsigned int cmd,
 				unsigned long arg);
 	int (*mmap_addr)(unsigned long addr);
@@ -1617,6 +1629,7 @@
 	int (*file_open)(struct file *file);
 
 	int (*task_alloc)(struct task_struct *task, unsigned long clone_flags);
+	void (*task_post_alloc)(struct task_struct *task); // Do not upstream.
 	void (*task_free)(struct task_struct *task);
 	int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp);
 	void (*cred_free)(struct cred *cred);
@@ -1649,6 +1662,7 @@
 	int (*task_movememory)(struct task_struct *p);
 	int (*task_kill)(struct task_struct *p, struct kernel_siginfo *info,
 				int sig, const struct cred *cred);
+	void (*task_exit)(struct task_struct *p);
 	int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3,
 				unsigned long arg4, unsigned long arg5);
 	void (*task_to_inode)(struct task_struct *p, struct inode *inode);
@@ -1818,6 +1832,14 @@
 	void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
 #endif /* CONFIG_BPF_SYSCALL */
 	int (*locked_down)(enum lockdown_reason what);
+#ifdef CONFIG_PERF_EVENTS
+	int (*perf_event_open)(struct perf_event_attr *attr, int type);
+	int (*perf_event_alloc)(struct perf_event *event);
+	void (*perf_event_free)(struct perf_event *event);
+	int (*perf_event_read)(struct perf_event *event);
+	int (*perf_event_write)(struct perf_event *event);
+
+#endif
 };
 
 struct security_hook_heads {
@@ -1906,6 +1928,7 @@
 	struct hlist_head file_permission;
 	struct hlist_head file_alloc_security;
 	struct hlist_head file_free_security;
+	struct hlist_head file_pre_free_security;
 	struct hlist_head file_ioctl;
 	struct hlist_head mmap_addr;
 	struct hlist_head mmap_file;
@@ -1917,6 +1940,7 @@
 	struct hlist_head file_receive;
 	struct hlist_head file_open;
 	struct hlist_head task_alloc;
+	struct hlist_head task_post_alloc;
 	struct hlist_head task_free;
 	struct hlist_head cred_alloc_blank;
 	struct hlist_head cred_free;
@@ -1943,6 +1967,7 @@
 	struct hlist_head task_getscheduler;
 	struct hlist_head task_movememory;
 	struct hlist_head task_kill;
+	struct hlist_head task_exit;
 	struct hlist_head task_prctl;
 	struct hlist_head task_to_inode;
 	struct hlist_head ipc_permission;
@@ -2060,6 +2085,13 @@
 	struct hlist_head bpf_prog_free_security;
 #endif /* CONFIG_BPF_SYSCALL */
 	struct hlist_head locked_down;
+#ifdef CONFIG_PERF_EVENTS
+	struct hlist_head perf_event_open;
+	struct hlist_head perf_event_alloc;
+	struct hlist_head perf_event_free;
+	struct hlist_head perf_event_read;
+	struct hlist_head perf_event_write;
+#endif
 } __randomize_layout;
 
 /*

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 68ccc5b..4f77b22 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -56,6 +56,7 @@
 #include <linux/perf_regs.h>
 #include <linux/cgroup.h>
 #include <linux/refcount.h>
+#include <linux/security.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -721,6 +722,9 @@
 	struct perf_cgroup		*cgrp; /* cgroup event is attach to */
 #endif
 
+#ifdef CONFIG_SECURITY
+	void *security;
+#endif
 	struct list_head		sb_list;
 #endif /* CONFIG_PERF_EVENTS */
 };
@@ -1241,19 +1245,41 @@
 int perf_event_max_stack_handler(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos);
 
-static inline bool perf_paranoid_tracepoint_raw(void)
+/* Access to perf_event_open(2) syscall. */
+#define PERF_SECURITY_OPEN		0
+
+/* Finer grained perf_event_open(2) access control. */
+#define PERF_SECURITY_CPU		1
+#define PERF_SECURITY_KERNEL		2
+#define PERF_SECURITY_TRACEPOINT	3
+
+static inline int perf_is_paranoid(void)
 {
 	return sysctl_perf_event_paranoid > -1;
 }
 
-static inline bool perf_paranoid_cpu(void)
+static inline int perf_allow_kernel(struct perf_event_attr *attr)
 {
-	return sysctl_perf_event_paranoid > 0;
+	if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
 }
 
-static inline bool perf_paranoid_kernel(void)
+static inline int perf_allow_cpu(struct perf_event_attr *attr)
 {
-	return sysctl_perf_event_paranoid > 1;
+	if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	return security_perf_event_open(attr, PERF_SECURITY_CPU);
+}
+
+static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
+{
+	if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
 }
 
 extern void perf_event_init(void);

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 49538b1..dded498 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h

@@ -45,6 +45,9 @@
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
 	struct ns_common ns;
+#ifdef CONFIG_SECURITY_CONTAINER_MONITOR
+	u64 cid;  /* Main container identifier, zero if not assigned. */
+#endif
 } __randomize_layout;
 
 extern struct pid_namespace init_pid_ns;

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5710b80..f118049 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -33,7 +33,6 @@
 #include <linux/rseq.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
-struct audit_context;
 struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
@@ -925,11 +924,7 @@
 	struct callback_head		*task_works;
 
 #ifdef CONFIG_AUDIT
-#ifdef CONFIG_AUDITSYSCALL
-	struct audit_context		*audit_context;
-#endif
-	kuid_t				loginuid;
-	unsigned int			sessionid;
+	struct audit_task_info		*audit;
 #endif
 	struct seccomp			seccomp;
 

diff --git a/include/linux/security.h b/include/linux/security.h
index df90399..0576e82 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h

@@ -359,6 +359,7 @@
 int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
 void security_file_free(struct file *file);
+void security_file_pre_free(struct file *file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 int security_mmap_file(struct file *file, unsigned long prot,
 			unsigned long flags);
@@ -373,6 +374,7 @@
 int security_file_receive(struct file *file);
 int security_file_open(struct file *file);
 int security_task_alloc(struct task_struct *task, unsigned long clone_flags);
+void security_task_post_alloc(struct task_struct *task);
 void security_task_free(struct task_struct *task);
 int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
 void security_cred_free(struct cred *cred);
@@ -404,6 +406,7 @@
 int security_task_movememory(struct task_struct *p);
 int security_task_kill(struct task_struct *p, struct kernel_siginfo *info,
 			int sig, const struct cred *cred);
+void security_task_exit(struct task_struct *p);
 int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			unsigned long arg4, unsigned long arg5);
 void security_task_to_inode(struct task_struct *p, struct inode *inode);
@@ -899,6 +902,9 @@
 static inline void security_file_free(struct file *file)
 { }
 
+static inline void security_file_pre_free(struct file *file)
+{ }
+
 static inline int security_file_ioctl(struct file *file, unsigned int cmd,
 				      unsigned long arg)
 {
@@ -962,6 +968,9 @@
 	return 0;
 }
 
+static inline void security_task_post_alloc(struct task_struct *task)
+{ }
+
 static inline void security_task_free(struct task_struct *task)
 { }
 
@@ -1097,6 +1106,9 @@
 	return 0;
 }
 
+static inline void security_task_exit(struct task_struct *p)
+{ }
+
 static inline int security_task_prctl(int option, unsigned long arg2,
 				      unsigned long arg3,
 				      unsigned long arg4,
@@ -1897,5 +1909,41 @@
 #endif /* CONFIG_SECURITY */
 #endif /* CONFIG_BPF_SYSCALL */
 
-#endif /* ! __LINUX_SECURITY_H */
+#ifdef CONFIG_PERF_EVENTS
+struct perf_event_attr;
 
+#ifdef CONFIG_SECURITY
+extern int security_perf_event_open(struct perf_event_attr *attr, int type);
+extern int security_perf_event_alloc(struct perf_event *event);
+extern void security_perf_event_free(struct perf_event *event);
+extern int security_perf_event_read(struct perf_event *event);
+extern int security_perf_event_write(struct perf_event *event);
+#else
+static inline int security_perf_event_open(struct perf_event_attr *attr,
+					   int type)
+{
+	return 0;
+}
+
+static inline int security_perf_event_alloc(struct perf_event *event)
+{
+	return 0;
+}
+
+static inline void security_perf_event_free(struct perf_event *event)
+{
+}
+
+static inline int security_perf_event_read(struct perf_event *event)
+{
+	return 0;
+}
+
+static inline int security_perf_event_write(struct perf_event *event)
+{
+	return 0;
+}
+#endif /* CONFIG_SECURITY */
+#endif /* CONFIG_PERF_EVENTS */
+
+#endif /* ! __LINUX_SECURITY_H */

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 4295de3..7e78e7d 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h

@@ -45,9 +45,14 @@
 	sock_cgroup_set_classid(skcd, classid);
 }
 
+static inline u32 __task_get_classid(struct task_struct *task)
+{
+	return task_cls_state(task)->classid;
+}
+
 static inline u32 task_get_classid(const struct sk_buff *skb)
 {
-	u32 classid = task_cls_state(current)->classid;
+	u32 classid = __task_get_classid(current);
 
 	/* Due to the nature of the classifier it is required to ignore all
 	 * packets originating from softirq context as accessing `current'

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index fe96bf2..81b9659 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h

@@ -85,9 +85,8 @@
 					      int iif, int sdif,
 					      bool *refcounted)
 {
-	struct sock *sk = skb_steal_sock(skb);
+	struct sock *sk = skb_steal_sock(skb, refcounted);
 
-	*refcounted = true;
 	if (sk)
 		return sk;
 

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 59802eb..9256097 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h

@@ -385,10 +385,9 @@
 					     const int sdif,
 					     bool *refcounted)
 {
-	struct sock *sk = skb_steal_sock(skb);
+	struct sock *sk = skb_steal_sock(skb, refcounted);
 	const struct iphdr *iph = ip_hdr(skb);
 
-	*refcounted = true;
 	if (sk)
 		return sk;
 

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 167e390..c8bddcf0 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h

@@ -165,6 +165,9 @@
 #ifdef CONFIG_XFRM
 	struct netns_xfrm	xfrm;
 #endif
+
+	atomic64_t		net_cookie; /* written once */
+
 #if IS_ENABLED(CONFIG_IP_VS)
 	struct netns_ipvs	*ipvs;
 #endif
@@ -229,6 +232,8 @@
 struct net *get_net_ns_by_pid(pid_t pid);
 struct net *get_net_ns_by_fd(int fd);
 
+u64 net_gen_cookie(struct net *net);
+
 #ifdef CONFIG_SYSCTL
 void ipx_register_sysctl(void);
 void ipx_unregister_sysctl(void);

diff --git a/include/net/sock.h b/include/net/sock.h
index 079b5f6..fed8706 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h

@@ -1623,6 +1623,7 @@
 void sock_efree(struct sk_buff *skb);
 #ifdef CONFIG_INET
 void sock_edemux(struct sk_buff *skb);
+void sock_pfree(struct sk_buff *skb);
 #else
 #define sock_edemux sock_efree
 #endif
@@ -2501,16 +2502,14 @@
 	write_pnet(&sk->sk_net, net);
 }
 
-static inline struct sock *skb_steal_sock(struct sk_buff *skb)
+static inline bool
+skb_sk_is_prefetched(struct sk_buff *skb)
 {
-	if (skb->sk) {
-		struct sock *sk = skb->sk;
-
-		skb->destructor = NULL;
-		skb->sk = NULL;
-		return sk;
-	}
-	return NULL;
+#ifdef CONFIG_INET
+	return skb->destructor == sock_pfree;
+#else
+	return false;
+#endif /* CONFIG_INET */
 }
 
 /* This helper checks if a socket is a full socket,
@@ -2521,6 +2520,35 @@
 	return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
 }
 
+static inline bool
+sk_is_refcounted(struct sock *sk)
+{
+	/* Only full sockets have sk->sk_flags. */
+	return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE);
+}
+
+/**
+ * skb_steal_sock
+ * @skb to steal the socket from
+ * @refcounted is set to true if the socket is reference-counted
+ */
+static inline struct sock *
+skb_steal_sock(struct sk_buff *skb, bool *refcounted)
+{
+	if (skb->sk) {
+		struct sock *sk = skb->sk;
+
+		*refcounted = true;
+		if (skb_sk_is_prefetched(skb))
+			*refcounted = sk_is_refcounted(sk);
+		skb->destructor = NULL;
+		skb->sk = NULL;
+		return sk;
+	}
+	*refcounted = false;
+	return NULL;
+}
+
 /* Checks if this SKB belongs to an HW offloaded socket
  * and whether any SW fallbacks are required based on dev.
  * Check decrypted mark in case skb_orphan() cleared socket.

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d68e9e5..038f163 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h

@@ -1292,18 +1292,34 @@
 	TP_ARGS(sb, group)
 );
 
-DEFINE_EVENT(ext4__bitmap_load, ext4_read_block_bitmap_load,
+DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap,
 
 	TP_PROTO(struct super_block *sb, unsigned long group),
 
 	TP_ARGS(sb, group)
 );
 
-DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap,
+TRACE_EVENT(ext4_read_block_bitmap_load,
+	TP_PROTO(struct super_block *sb, unsigned long group, bool prefetch),
 
-	TP_PROTO(struct super_block *sb, unsigned long group),
+	TP_ARGS(sb, group, prefetch),
 
-	TP_ARGS(sb, group)
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	__u32,	group			)
+		__field(	bool,	prefetch		)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->group	= group;
+		__entry->prefetch = prefetch;
+	),
+
+	TP_printk("dev %d,%d group %u prefetch %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->group, __entry->prefetch)
 );
 
 TRACE_EVENT(ext4_direct_IO_enter,
@@ -2703,6 +2719,50 @@
 		  __entry->function, __entry->line)
 );
 
+TRACE_EVENT(ext4_prefetch_bitmaps,
+	    TP_PROTO(struct super_block *sb, ext4_group_t group,
+		     ext4_group_t next, unsigned int prefetch_ios),
+
+	TP_ARGS(sb, group, next, prefetch_ios),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	__u32,	group			)
+		__field(	__u32,	next			)
+		__field(	__u32,	ios			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->group	= group;
+		__entry->next	= next;
+		__entry->ios	= prefetch_ios;
+	),
+
+	TP_printk("dev %d,%d group %u next %u ios %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->group, __entry->next, __entry->ios)
+);
+
+TRACE_EVENT(ext4_lazy_itable_init,
+	    TP_PROTO(struct super_block *sb, ext4_group_t group),
+
+	TP_ARGS(sb, group),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	__u32,	group			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->group	= group;
+	),
+
+	TP_printk("dev %d,%d group %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index c89c649..5d0ea2a 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h

@@ -71,6 +71,7 @@
 #define AUDIT_TTY_SET		1017	/* Set TTY auditing status */
 #define AUDIT_SET_FEATURE	1018	/* Turn an audit feature on or off */
 #define AUDIT_GET_FEATURE	1019	/* Get which features are enabled */
+#define AUDIT_CONTAINER_OP	1020	/* Define the container id and info */
 
 #define AUDIT_FIRST_USER_MSG	1100	/* Userspace messages mostly uninteresting to kernel */
 #define AUDIT_USER_AVC		1107	/* We filter this differently */
@@ -488,6 +489,7 @@
 
 #define AUDIT_UID_UNSET (unsigned int)-1
 #define AUDIT_SID_UNSET ((unsigned int)-1)
+#define AUDIT_CID_UNSET ((u64)-1)
 
 /* audit_rule_data supports filter rules with both integer and string
  * fields.  It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 63038eb..b27a109 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h

@@ -199,6 +199,16 @@
 	BPF_CGROUP_UDP6_RECVMSG,
 	BPF_CGROUP_GETSOCKOPT,
 	BPF_CGROUP_SETSOCKOPT,
+	BPF_TRACE_RAW_TP,
+	BPF_TRACE_FENTRY,
+	BPF_TRACE_FEXIT,
+	BPF_MODIFY_RETURN,
+	BPF_LSM_MAC,
+	BPF_TRACE_ITER,
+	BPF_CGROUP_INET4_GETPEERNAME,
+	BPF_CGROUP_INET6_GETPEERNAME,
+	BPF_CGROUP_INET4_GETSOCKNAME,
+	BPF_CGROUP_INET6_GETSOCKNAME,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -2750,6 +2760,231 @@
  *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
  *
  *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * 	Description
+ * 		Write raw *data* blob into a special BPF perf event held by
+ * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * 		event must have the following attributes: **PERF_SAMPLE_RAW**
+ * 		as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * 		**PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * 		The *flags* are used to indicate the index in *map* for which
+ * 		the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * 		Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * 		to indicate that the index of the current CPU core should be
+ * 		used.
+ *
+ * 		The value to write, of *size*, is passed through eBPF stack and
+ * 		pointed by *data*.
+ *
+ * 		*ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ * 		This helper is similar to **bpf_perf_event_output**\ () but
+ * 		restricted to raw_tracepoint bpf programs.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * 	Description
+ * 		Safely attempt to read *size* bytes from user space address
+ * 		*unsafe_ptr* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * 	Description
+ * 		Safely attempt to read *size* bytes from kernel space address
+ * 		*unsafe_ptr* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe user address
+ * 		*unsafe_ptr* to *dst*. The *size* should include the
+ * 		terminating NUL byte. In case the string length is smaller than
+ * 		*size*, the target is not padded with further NUL bytes. If the
+ * 		string length is larger than *size*, just *size*-1 bytes are
+ * 		copied and the last byte is set to NUL.
+ *
+ * 		On success, the length of the copied string is returned. This
+ * 		makes this helper useful in tracing programs for reading
+ * 		strings, and more importantly to get its length at runtime. See
+ * 		the following snippet:
+ *
+ * 		::
+ *
+ * 			SEC("kprobe/sys_open")
+ * 			void bpf_sys_open(struct pt_regs *ctx)
+ * 			{
+ * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
+ * 			        int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ * 				                                  ctx->di);
+ *
+ * 				// Consume buf, for example push it to
+ * 				// userspace via bpf_perf_event_output(); we
+ * 				// can use res (the string length) as event
+ * 				// size, after checking its boundaries.
+ * 			}
+ *
+ * 		In comparison, using **bpf_probe_read_user()** helper here
+ * 		instead to read the string would require to estimate the length
+ * 		at compile time, and would often result in copying more memory
+ * 		than necessary.
+ *
+ * 		Another useful use case is when parsing individual process
+ * 		arguments or individual environment variables navigating
+ * 		*current*\ **->mm->arg_start** and *current*\
+ * 		**->mm->env_start**: using this helper and the return value,
+ * 		one can quickly iterate at the right offset of the memory area.
+ * 	Return
+ * 		On success, the strictly positive length of the string,
+ * 		including the trailing NUL character. On error, a negative
+ * 		value.
+ *
+ * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * 		to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * 	Return
+ * 		On success, the strictly positive length of the string,	including
+ * 		the trailing NUL character. On error, a negative value.
+ *
+ * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
+ *	Description
+ *		Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
+ *		*rcv_nxt* is the ack_seq to be sent out.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_send_signal_thread(u32 sig)
+ *	Description
+ *		Send signal *sig* to the thread corresponding to the current task.
+ *	Return
+ *		0 on success or successfully queued.
+ *
+ *		**-EBUSY** if work queue under nmi is full.
+ *
+ *		**-EINVAL** if *sig* is invalid.
+ *
+ *		**-EPERM** if no permission to send the *sig*.
+ *
+ *		**-EAGAIN** if bpf program can try again.
+ *
+ * u64 bpf_jiffies64(void)
+ *	Description
+ *		Obtain the 64bit jiffies
+ *	Return
+ *		The 64 bit jiffies
+ *
+ * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ *	Description
+ *		For an eBPF program attached to a perf event, retrieve the
+ *		branch records (struct perf_branch_entry) associated to *ctx*
+ *		and store it in	the buffer pointed by *buf* up to size
+ *		*size* bytes.
+ *	Return
+ *		On success, number of bytes written to *buf*. On error, a
+ *		negative value.
+ *
+ *		The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
+ *		instead	return the number of bytes required to store all the
+ *		branch entries. If this flag is set, *buf* may be NULL.
+ *
+ *		**-EINVAL** if arguments invalid or **size** not a multiple
+ *		of sizeof(struct perf_branch_entry).
+ *
+ *		**-ENOENT** if architecture does not support branch records.
+ *
+ * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ *	Description
+ *		Returns 0 on success, values for *pid* and *tgid* as seen from the current
+ *		*namespace* will be returned in *nsdata*.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EINVAL** if dev and inum supplied don't match dev_t and inode number
+ *              with nsfs of current task, or if dev conversion to dev_t lost high bits.
+ *
+ *		**-ENOENT** if pidns does not exists for the current task.
+ *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *	Description
+ *		Write raw *data* blob into a special BPF perf event held by
+ *		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *		event must have the following attributes: **PERF_SAMPLE_RAW**
+ *		as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *		**PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *		The *flags* are used to indicate the index in *map* for which
+ *		the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *		Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *		to indicate that the index of the current CPU core should be
+ *		used.
+ *
+ *		The value to write, of *size*, is passed through eBPF stack and
+ *		pointed by *data*.
+ *
+ *		*ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ *		This helper is similar to **bpf_perf_eventoutput**\ () but
+ *		restricted to raw_tracepoint bpf programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_netns_cookie(void *ctx)
+ * 	Description
+ * 		Retrieve the cookie (generated by the kernel) of the network
+ * 		namespace the input *ctx* is associated with. The network
+ * 		namespace cookie remains stable for its lifetime and provides
+ * 		a global identifier that can be assumed unique. If *ctx* is
+ * 		NULL, then the helper returns the cookie for the initial
+ * 		network namespace. The cookie itself is very similar to that
+ * 		of bpf_get_socket_cookie() helper, but for network namespaces
+ * 		instead of sockets.
+ * 	Return
+ * 		A 8-byte long opaque number.
+ *
+ * u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level)
+ * 	Description
+ * 		Return id of cgroup v2 that is ancestor of the cgroup associated
+ * 		with the current task at the *ancestor_level*. The root cgroup
+ * 		is at *ancestor_level* zero and each step down the hierarchy
+ * 		increments the level. If *ancestor_level* == level of cgroup
+ * 		associated with the current task, then return value will be the
+ * 		same as that of **bpf_get_current_cgroup_id**\ ().
+ *
+ * 		The helper is useful to implement policies based on cgroups
+ * 		that are upper in hierarchy than immediate cgroup associated
+ * 		with the current task.
+ *
+ * 		The format of returned id and helper limitations are same as in
+ * 		**bpf_get_current_cgroup_id**\ ().
+ * 	Return
+ * 		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ *	Description
+ *		Assign the *sk* to the *skb*. When combined with appropriate
+ *		routing configuration to receive the packet towards the socket,
+ *		will cause *skb* to be delivered to the specified socket.
+ *		Subsequent redirection of *skb* via  **bpf_redirect**\ (),
+ *		**bpf_clone_redirect**\ () or other methods outside of BPF may
+ *		interfere with successful delivery to the socket.
+ *
+ *		This operation is only valid from TC ingress path.
+ *
+ *		The *flags* argument must be zero.
+ *	Return
+ *		0 on success, or a negative errno in case of failure.
+ *
+ *		* **-EINVAL**		Unsupported flags specified.
+ *		* **-ENOENT**		Socket is unavailable for assignment.
+ *		* **-ENETUNREACH**	Socket is unreachable (wrong netns).
+ *		* **-EOPNOTSUPP**	Unsupported operation, for example a
+ *					call from outside of TC ingress.
+ *		* **-ESOCKTNOSUPPORT**	Socket type not supported (reuseport).
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2862,7 +3097,21 @@
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
 	FN(send_signal),		\
-	FN(tcp_gen_syncookie),
+	FN(tcp_gen_syncookie),		\
+	FN(skb_output),			\
+	FN(probe_read_user),		\
+	FN(probe_read_kernel),		\
+	FN(probe_read_user_str),	\
+	FN(probe_read_kernel_str),	\
+	FN(tcp_send_ack),		\
+	FN(send_signal_thread),		\
+	FN(jiffies64),			\
+	FN(read_branch_records),	\
+	FN(get_ns_current_pid_tgid),	\
+	FN(xdp_output),			\
+	FN(get_netns_cookie),		\
+	FN(get_current_ancestor_cgroup_id),	\
+	FN(sk_assign),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call

diff --git a/init/init_task.c b/init/init_task.c
index 5d8359c..e40f14b 100644
--- a/init/init_task.c
+++ b/init/init_task.c

@@ -123,8 +123,7 @@
 	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 #ifdef CONFIG_AUDIT
-	.loginuid	= INVALID_UID,
-	.sessionid	= AUDIT_SID_UNSET,
+	.audit		= &init_struct_audit,
 #endif
 #ifdef CONFIG_PERF_EVENTS
 	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),

diff --git a/init/main.c b/init/main.c
index e6a1fb1..06b939a 100644
--- a/init/main.c
+++ b/init/main.c

@@ -94,6 +94,7 @@
 #include <linux/rodata_test.h>
 #include <linux/jump_label.h>
 #include <linux/mem_encrypt.h>
+#include <linux/audit.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -526,14 +527,16 @@
 {
 	const char *stack;
 
-	if (IS_ENABLED(CONFIG_INIT_STACK_ALL))
-		stack = "all";
+	if (IS_ENABLED(CONFIG_INIT_STACK_ALL_PATTERN))
+		stack = "all(pattern)";
+	else if (IS_ENABLED(CONFIG_INIT_STACK_ALL_ZERO))
+		stack = "all(zero)";
 	else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL))
-		stack = "byref_all";
+		stack = "byref_all(zero)";
 	else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF))
-		stack = "byref";
+		stack = "byref(zero)";
 	else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_USER))
-		stack = "__user";
+		stack = "__user(zero)";
 	else
 		stack = "off";
 
@@ -771,6 +774,7 @@
 	nsfs_init();
 	cpuset_init();
 	cgroup_init();
+	audit_task_init();
 	taskstats_init_early();
 	delayacct_init();
 

diff --git a/kernel/audit.c b/kernel/audit.c
index 05ae208..5dd7c2b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c

@@ -202,6 +202,75 @@
 	struct sk_buff *skb;
 };
 
+static struct kmem_cache *audit_task_cache;
+
+void __init audit_task_init(void)
+{
+	audit_task_cache = kmem_cache_create("audit_task",
+					     sizeof(struct audit_task_info),
+					     0, SLAB_PANIC, NULL);
+}
+
+/**
+ * audit_alloc - allocate an audit info block for a task
+ * @tsk: task
+ *
+ * Call audit_alloc_syscall to filter on the task information and
+ * allocate a per-task audit context if necessary.  This is called from
+ * copy_process, so no lock is needed.
+ */
+int audit_alloc(struct task_struct *tsk)
+{
+	int ret = 0;
+	struct audit_task_info *info;
+
+	info = kmem_cache_alloc(audit_task_cache, GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	info->loginuid = audit_get_loginuid(current);
+	info->sessionid = audit_get_sessionid(current);
+	info->contid = audit_get_contid(current);
+	tsk->audit = info;
+
+	ret = audit_alloc_syscall(tsk);
+	if (ret) {
+		tsk->audit = NULL;
+		kmem_cache_free(audit_task_cache, info);
+	}
+out:
+	return ret;
+}
+
+struct audit_task_info init_struct_audit = {
+	.loginuid = INVALID_UID,
+	.sessionid = AUDIT_SID_UNSET,
+	.contid = AUDIT_CID_UNSET,
+#ifdef CONFIG_AUDITSYSCALL
+	.ctx = NULL,
+#endif
+};
+
+/**
+ * audit_free - free per-task audit info
+ * @tsk: task whose audit info block to free
+ *
+ * Called from copy_process and do_exit
+ */
+void audit_free(struct task_struct *tsk)
+{
+	struct audit_task_info *info = tsk->audit;
+
+	audit_free_syscall(tsk);
+	/* Freeing the audit_task_info struct must be performed after
+	 * audit_log_exit() due to need for loginuid and sessionid.
+	 */
+	info = tsk->audit;
+	tsk->audit = NULL;
+	kmem_cache_free(audit_task_cache, info);
+}
+
 /**
  * auditd_test_task - Check to see if a given task is an audit daemon
  * @task: the task to check
@@ -2266,8 +2335,8 @@
 			sessionid = (unsigned int)atomic_inc_return(&session_id);
 	}
 
-	current->sessionid = sessionid;
-	current->loginuid = loginuid;
+	current->audit->sessionid = sessionid;
+	current->audit->loginuid = loginuid;
 out:
 	audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
 	return rc;
@@ -2300,6 +2369,62 @@
 	return audit_signal_info_syscall(t);
 }
 
+/*
+ * audit_set_contid - set current task's audit contid
+ * @task: target task
+ * @contid: contid value
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_contid_write().
+ */
+int audit_set_contid(struct task_struct *task, u64 contid)
+{
+	u64 oldcontid;
+	int rc = 0;
+	struct audit_buffer *ab;
+
+	task_lock(task);
+	/* Can't set if audit disabled */
+	if (!task->audit) {
+		task_unlock(task);
+		return -ENOPROTOOPT;
+	}
+	oldcontid = audit_get_contid(task);
+	read_lock(&tasklist_lock);
+	/* Don't allow the audit containerid to be unset */
+	if (!audit_contid_valid(contid))
+		rc = -EINVAL;
+	/* if we don't have caps, reject */
+	else if (!capable(CAP_AUDIT_CONTROL))
+		rc = -EPERM;
+	/* if task has children or is not single-threaded, deny */
+	else if (!list_empty(&task->children))
+		rc = -EBUSY;
+	else if (!(thread_group_leader(task) && thread_group_empty(task)))
+		rc = -EALREADY;
+	/* if contid is already set, deny */
+	else if (audit_contid_set(task))
+		rc = -ECHILD;
+	read_unlock(&tasklist_lock);
+	if (!rc)
+		task->audit->contid = contid;
+	task_unlock(task);
+
+	if (!audit_enabled)
+		return rc;
+
+	ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_CONTAINER_OP);
+	if (!ab)
+		return rc;
+
+	audit_log_format(ab,
+			 "op=set opid=%d contid=%llu old-contid=%llu",
+			 task_tgid_nr(task), contid, oldcontid);
+	audit_log_end(ab);
+	return rc;
+}
+
 /**
  * audit_log_end - end one audit record
  * @ab: the audit_buffer

diff --git a/kernel/audit.h b/kernel/audit.h
index ddc2287..2be1b95 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h

@@ -135,6 +135,7 @@
 	kuid_t		    target_uid;
 	unsigned int	    target_sessionid;
 	u32		    target_sid;
+	u64		    target_cid;
 	char		    target_comm[TASK_COMM_LEN];
 
 	struct audit_tree_refs *trees, *first_trees;
@@ -251,6 +252,8 @@
 extern unsigned int audit_serial(void);
 extern int auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec64 *t, unsigned int *serial);
+extern int audit_alloc_syscall(struct task_struct *tsk);
+extern void audit_free_syscall(struct task_struct *tsk);
 
 extern void audit_put_watch(struct audit_watch *watch);
 extern void audit_get_watch(struct audit_watch *watch);
@@ -292,6 +295,9 @@
 extern struct list_head *audit_killed_trees(void);
 #else /* CONFIG_AUDITSYSCALL */
 #define auditsc_get_stamp(c, t, s) 0
+#define audit_alloc_syscall(t) 0
+#define audit_free_syscall(t) {}
+
 #define audit_put_watch(w) {}
 #define audit_get_watch(w) {}
 #define audit_to_watch(k, p, l, o) (-EINVAL)

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4effe01..0e2d505 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c

@@ -113,6 +113,7 @@
 	kuid_t			target_uid[AUDIT_AUX_PIDS];
 	unsigned int		target_sessionid[AUDIT_AUX_PIDS];
 	u32			target_sid[AUDIT_AUX_PIDS];
+	u64			target_cid[AUDIT_AUX_PIDS];
 	char 			target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN];
 	int			pid_count;
 };
@@ -903,23 +904,25 @@
 	return context;
 }
 
-/**
- * audit_alloc - allocate an audit context block for a task
+/*
+ * audit_alloc_syscall - allocate an audit context block for a task
  * @tsk: task
  *
  * Filter on the task information and allocate a per-task audit context
  * if necessary.  Doing so turns on system call auditing for the
- * specified task.  This is called from copy_process, so no lock is
- * needed.
+ * specified task.  This is called from copy_process via audit_alloc, so
+ * no lock is needed.
  */
-int audit_alloc(struct task_struct *tsk)
+int audit_alloc_syscall(struct task_struct *tsk)
 {
 	struct audit_context *context;
 	enum audit_state     state;
 	char *key = NULL;
 
-	if (likely(!audit_ever_enabled))
+	if (likely(!audit_ever_enabled)) {
+		audit_set_context(tsk, NULL);
 		return 0; /* Return if not auditing. */
+	}
 
 	state = audit_filter_task(tsk, &key);
 	if (state == AUDIT_DISABLED) {
@@ -929,7 +932,7 @@
 
 	if (!(context = audit_alloc_context(state))) {
 		kfree(key);
-		audit_log_lost("out of memory in audit_alloc");
+		audit_log_lost("out of memory in audit_alloc_syscall");
 		return -ENOMEM;
 	}
 	context->filterkey = key;
@@ -1574,14 +1577,15 @@
 }
 
 /**
- * __audit_free - free a per-task audit context
+ * audit_free_syscall - free per-task audit context info
  * @tsk: task whose audit context block to free
  *
- * Called from copy_process and do_exit
+ * Called from audit_free
  */
-void __audit_free(struct task_struct *tsk)
+void audit_free_syscall(struct task_struct *tsk)
 {
-	struct audit_context *context = tsk->audit_context;
+	struct audit_task_info *info = tsk->audit;
+	struct audit_context *context = info->ctx;
 
 	if (!context)
 		return;
@@ -1604,7 +1608,6 @@
 		if (context->current_state == AUDIT_RECORD_CONTEXT)
 			audit_log_exit();
 	}
-
 	audit_set_context(tsk, NULL);
 	audit_free_context(context);
 }
@@ -2373,6 +2376,7 @@
 	context->target_uid = task_uid(t);
 	context->target_sessionid = audit_get_sessionid(t);
 	security_task_getsecid(t, &context->target_sid);
+	context->target_cid = audit_get_contid(t);
 	memcpy(context->target_comm, t->comm, TASK_COMM_LEN);
 }
 
@@ -2400,6 +2404,7 @@
 		ctx->target_uid = t_uid;
 		ctx->target_sessionid = audit_get_sessionid(t);
 		security_task_getsecid(t, &ctx->target_sid);
+		ctx->target_cid = audit_get_contid(t);
 		memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN);
 		return 0;
 	}
@@ -2421,6 +2426,7 @@
 	axp->target_uid[axp->pid_count] = t_uid;
 	axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t);
 	security_task_getsecid(t, &axp->target_sid[axp->pid_count]);
+	axp->target_cid[axp->pid_count] = audit_get_contid(t);
 	memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN);
 	axp->pid_count++;
 

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index d9a3d99..61d439b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c

@@ -2072,6 +2072,7 @@
 const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
 const struct bpf_func_proto bpf_spin_lock_proto __weak;
 const struct bpf_func_proto bpf_spin_unlock_proto __weak;
+const struct bpf_func_proto bpf_jiffies64_proto __weak;
 
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a77d281..91fe4e0 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c

@@ -11,6 +11,7 @@
 #include <linux/uidgid.h>
 #include <linux/filter.h>
 #include <linux/ctype.h>
+#include <linux/jiffies.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -312,6 +313,17 @@
 	preempt_enable();
 }
 
+BPF_CALL_0(bpf_jiffies64)
+{
+	return get_jiffies_64();
+}
+
+const struct bpf_func_proto bpf_jiffies64_proto = {
+	.func		= bpf_jiffies64,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 #ifdef CONFIG_CGROUPS
 BPF_CALL_0(bpf_get_current_cgroup_id)
 {

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bf03d04..b6695e6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c

@@ -1598,6 +1598,10 @@
 		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_INET4_GETPEERNAME:
+		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_INET4_GETSOCKNAME:
+		case BPF_CGROUP_INET6_GETSOCKNAME:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP6_SENDMSG:
 		case BPF_CGROUP_UDP4_RECVMSG:
@@ -1925,6 +1929,10 @@
 	case BPF_CGROUP_INET6_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
+	case BPF_CGROUP_INET4_GETPEERNAME:
+	case BPF_CGROUP_INET6_GETPEERNAME:
+	case BPF_CGROUP_INET4_GETSOCKNAME:
+	case BPF_CGROUP_INET6_GETSOCKNAME:
 	case BPF_CGROUP_UDP4_SENDMSG:
 	case BPF_CGROUP_UDP6_SENDMSG:
 	case BPF_CGROUP_UDP4_RECVMSG:
@@ -2073,6 +2081,10 @@
 	case BPF_CGROUP_INET6_POST_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
+	case BPF_CGROUP_INET4_GETPEERNAME:
+	case BPF_CGROUP_INET6_GETPEERNAME:
+	case BPF_CGROUP_INET4_GETSOCKNAME:
+	case BPF_CGROUP_INET6_GETSOCKNAME:
 	case BPF_CGROUP_UDP4_SENDMSG:
 	case BPF_CGROUP_UDP6_SENDMSG:
 	case BPF_CGROUP_UDP4_RECVMSG:

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9c5fa5c..eca61a1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c

@@ -3341,13 +3341,17 @@
 		expected_type = CONST_PTR_TO_MAP;
 		if (type != expected_type)
 			goto err_type;
-	} else if (arg_type == ARG_PTR_TO_CTX) {
+	} else if (arg_type == ARG_PTR_TO_CTX ||
+		   arg_type == ARG_PTR_TO_CTX_OR_NULL) {
 		expected_type = PTR_TO_CTX;
-		if (type != expected_type)
-			goto err_type;
-		err = check_ctx_reg(env, reg, regno);
-		if (err < 0)
-			return err;
+		if (!(register_is_null(reg) &&
+		      arg_type == ARG_PTR_TO_CTX_OR_NULL)) {
+			if (type != expected_type)
+				goto err_type;
+			err = check_ctx_reg(env, reg, regno);
+			if (err < 0)
+				return err;
+		}
 	} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
 		expected_type = PTR_TO_SOCK_COMMON;
 		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
@@ -6414,7 +6418,11 @@
 	switch (env->prog->type) {
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
-		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
+		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
+		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
 			range = tnum_range(1, 1);
 		break;
 	case BPF_PROG_TYPE_CGROUP_SKB:
@@ -9443,6 +9451,30 @@
 			goto patch_call_imm;
 		}
 
+		if (prog->jit_requested && BITS_PER_LONG == 64 &&
+		    insn->imm == BPF_FUNC_jiffies64) {
+			struct bpf_insn ld_jiffies_addr[2] = {
+				BPF_LD_IMM64(BPF_REG_0,
+					     (unsigned long)&jiffies),
+			};
+
+			insn_buf[0] = ld_jiffies_addr[0];
+			insn_buf[1] = ld_jiffies_addr[1];
+			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
+						  BPF_REG_0, 0);
+			cnt = 3;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
+						       cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			continue;
+		}
+
 patch_call_imm:
 		fn = env->ops->get_func_proto(insn->imm, env->prog);
 		/* all functions that have prototype and verifier allowed

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1993a74..0cae26b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -4243,8 +4243,9 @@
 
 	if (!task) {
 		/* Must be root to operate on a CPU event: */
-		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-			return ERR_PTR(-EACCES);
+		err = perf_allow_cpu(&event->attr);
+		if (err)
+			return ERR_PTR(err);
 
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 		ctx = &cpuctx->ctx;
@@ -4555,6 +4556,8 @@
 
 	unaccount_event(event);
 
+	security_perf_event_free(event);
+
 	if (event->rb) {
 		/*
 		 * Can happen when we close an event with re-directed output.
@@ -5008,6 +5011,10 @@
 	struct perf_event_context *ctx;
 	int ret;
 
+	ret = security_perf_event_read(event);
+	if (ret)
+		return ret;
+
 	ctx = perf_event_ctx_lock(event);
 	ret = __perf_read(event, buf, count);
 	perf_event_ctx_unlock(event, ctx);
@@ -5272,6 +5279,11 @@
 	struct perf_event_context *ctx;
 	long ret;
 
+	/* Treat ioctl like writes as it is likely a mutating operation. */
+	ret = security_perf_event_write(event);
+	if (ret)
+		return ret;
+
 	ctx = perf_event_ctx_lock(event);
 	ret = _perf_ioctl(event, cmd, arg);
 	perf_event_ctx_unlock(event, ctx);
@@ -5736,6 +5748,10 @@
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
+	ret = security_perf_event_read(event);
+	if (ret)
+		return ret;
+
 	vma_size = vma->vm_end - vma->vm_start;
 
 	if (vma->vm_pgoff == 0) {
@@ -5869,7 +5885,7 @@
 	lock_limit >>= PAGE_SHIFT;
 	locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
 
-	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
+	if ((locked > lock_limit) && perf_is_paranoid() &&
 		!capable(CAP_IPC_LOCK)) {
 		ret = -EPERM;
 		goto unlock;
@@ -10620,11 +10636,20 @@
 		}
 	}
 
+	err = security_perf_event_alloc(event);
+	if (err)
+		goto err_callchain_buffer;
+
 	/* symmetric to unaccount_event() in _free_event() */
 	account_event(event);
 
 	return event;
 
+err_callchain_buffer:
+	if (!event->parent) {
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+			put_callchain_buffers();
+	}
 err_addr_filters:
 	kfree(event->addr_filter_ranges);
 
@@ -10713,9 +10738,11 @@
 			attr->branch_sample_type = mask;
 		}
 		/* privileged levels capture (kernel, hv): check permissions */
-		if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
-		    && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
+		if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) {
+			ret = perf_allow_kernel(attr);
+			if (ret)
+				return ret;
+		}
 	}
 
 	if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
@@ -10928,13 +10955,19 @@
 	if (flags & ~PERF_FLAG_ALL)
 		return -EINVAL;
 
+	/* Do we allow access to perf_event_open(2) ? */
+	err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
+	if (err)
+		return err;
+
 	err = perf_copy_attr(attr_uptr, &attr);
 	if (err)
 		return err;
 
 	if (!attr.exclude_kernel) {
-		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
+		err = perf_allow_kernel(&attr);
+		if (err)
+			return err;
 	}
 
 	if (attr.namespaces) {
@@ -10951,9 +10984,11 @@
 	}
 
 	/* Only privileged users can get physical addresses */
-	if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
-	    perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-		return -EACCES;
+	if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
+		err = perf_allow_kernel(&attr);
+		if (err)
+			return err;
+	}
 
 	/* REGS_INTR can leak data, lockdown must prevent this */
 	if (attr.sample_type & PERF_SAMPLE_REGS_INTR) {

diff --git a/kernel/exit.c b/kernel/exit.c
index ece6477..db8f9ee 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c

@@ -63,6 +63,7 @@
 #include <linux/random.h>
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
+#include <linux/security.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -786,6 +787,8 @@
 #endif
 		if (tsk->mm)
 			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
+
+		security_task_exit(tsk);
 	}
 	acct_collect(code, group_dead);
 	if (group_dead)

diff --git a/kernel/fork.c b/kernel/fork.c
index cf2cebd..8278273 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -1950,7 +1950,6 @@
 	posix_cputimers_init(&p->posix_cputimers);
 
 	p->io_context = NULL;
-	audit_set_context(p, NULL);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
@@ -2243,6 +2242,7 @@
 	uprobe_copy_process(p, clone_flags);
 
 	copy_oom_score_adj(clone_flags, p);
+	security_task_post_alloc(p);
 
 	return p;
 

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index a9dfa04..643e0b1 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c

@@ -8,6 +8,7 @@
 
 #include <linux/module.h>
 #include <linux/kprobes.h>
+#include <linux/security.h>
 #include "trace.h"
 #include "trace_probe.h"
 
@@ -26,8 +27,10 @@
 static int perf_trace_event_perm(struct trace_event_call *tp_event,
 				 struct perf_event *p_event)
 {
+	int ret;
+
 	if (tp_event->perf_perm) {
-		int ret = tp_event->perf_perm(tp_event, p_event);
+		ret = tp_event->perf_perm(tp_event, p_event);
 		if (ret)
 			return ret;
 	}
@@ -46,8 +49,9 @@
 
 	/* The ftrace function trace is allowed only for root. */
 	if (ftrace_event_is_function(tp_event)) {
-		if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
-			return -EPERM;
+		ret = perf_allow_tracepoint(&p_event->attr);
+		if (ret)
+			return ret;
 
 		if (!is_sampling_event(p_event))
 			return 0;
@@ -82,8 +86,9 @@
 	 * ...otherwise raw tracepoint data can be a severe data leak,
 	 * only allow root to have these.
 	 */
-	if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
+	ret = perf_allow_tracepoint(&p_event->attr);
+	if (ret)
+		return ret;
 
 	return 0;
 }

diff --git a/net/core/filter.c b/net/core/filter.c
index 0e161a6..73cb5a38 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c

@@ -2644,6 +2644,19 @@
 	.arg4_type	= ARG_ANYTHING,
 };
 
+#ifdef CONFIG_CGROUP_NET_CLASSID
+BPF_CALL_0(bpf_get_cgroup_classid_curr)
+{
+	return __task_get_classid(current);
+}
+
+static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
+	.func		= bpf_get_cgroup_classid_curr,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+#endif
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -4179,6 +4192,18 @@
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
+{
+	return sock_gen_cookie(ctx);
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
+	.func		= bpf_get_socket_cookie_sock,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
 {
 	return sock_gen_cookie(ctx->sk);
@@ -4191,6 +4216,39 @@
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
+static u64 __bpf_get_netns_cookie(struct sock *sk)
+{
+#ifdef CONFIG_NET_NS
+	return net_gen_cookie(sk ? sk->sk_net.net : &init_net);
+#else
+	return 0;
+#endif
+}
+
+BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
+{
+	return __bpf_get_netns_cookie(ctx);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
+	.func		= bpf_get_netns_cookie_sock,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX_OR_NULL,
+};
+
+BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
+{
+	return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
+	.func		= bpf_get_netns_cookie_sock_addr,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX_OR_NULL,
+};
+
 BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
 {
 	struct sock *sk = sk_to_full_sk(skb->sk);
@@ -4209,8 +4267,8 @@
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
-BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
-	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
+BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags,
+	   void *, data, u64, size)
 {
 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
 		return -EINVAL;
@@ -4218,8 +4276,8 @@
 	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
 }
 
-static const struct bpf_func_proto bpf_sockopt_event_output_proto =  {
-	.func		= bpf_sockopt_event_output,
+static const struct bpf_func_proto bpf_event_output_data_proto =  {
+	.func		= bpf_event_output_data,
 	.gpl_only       = true,
 	.ret_type       = RET_INTEGER,
 	.arg1_type      = ARG_PTR_TO_CTX,
@@ -5414,8 +5472,7 @@
 
 BPF_CALL_1(bpf_sk_release, struct sock *, sk)
 {
-	/* Only full sockets have sk->sk_flags. */
-	if (!sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE))
+	if (sk_is_refcounted(sk))
 		sock_gen_put(sk);
 	return 0;
 }
@@ -5935,6 +5992,36 @@
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
+{
+	if (flags != 0)
+		return -EINVAL;
+	if (!skb_at_tc_ingress(skb))
+		return -EOPNOTSUPP;
+	if (unlikely(dev_net(skb->dev) != sock_net(sk)))
+		return -ENETUNREACH;
+	if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
+		return -ESOCKTNOSUPPORT;
+	if (sk_is_refcounted(sk) &&
+	    unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
+		return -ENOENT;
+
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_pfree;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_assign_proto = {
+	.func		= bpf_sk_assign,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_PTR_TO_SOCK_COMMON,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -6011,6 +6098,8 @@
 		return &bpf_spin_unlock_proto;
 	case BPF_FUNC_trace_printk:
 		return bpf_get_trace_printk_proto();
+	case BPF_FUNC_jiffies64:
+		return &bpf_jiffies64_proto;
 	default:
 		return NULL;
 	}
@@ -6027,6 +6116,20 @@
 		return &bpf_get_current_uid_gid_proto;
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_cookie_sock_proto;
+	case BPF_FUNC_get_netns_cookie:
+		return &bpf_get_netns_cookie_sock_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_event_output_data_proto;
+	case BPF_FUNC_get_current_pid_tgid:
+		return &bpf_get_current_pid_tgid_proto;
+	case BPF_FUNC_get_current_comm:
+		return &bpf_get_current_comm_proto;
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	case BPF_FUNC_get_cgroup_classid:
+		return &bpf_get_cgroup_classid_curr_proto;
+#endif
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -6051,8 +6154,20 @@
 		}
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_cookie_sock_addr_proto;
+	case BPF_FUNC_get_netns_cookie:
+		return &bpf_get_netns_cookie_sock_addr_proto;
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_event_output_data_proto;
+	case BPF_FUNC_get_current_pid_tgid:
+		return &bpf_get_current_pid_tgid_proto;
+	case BPF_FUNC_get_current_comm:
+		return &bpf_get_current_comm_proto;
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	case BPF_FUNC_get_cgroup_classid:
+		return &bpf_get_cgroup_classid_curr_proto;
+#endif
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_tcp:
 		return &bpf_sock_addr_sk_lookup_tcp_proto;
@@ -6226,6 +6341,8 @@
 		return &bpf_skb_ecn_set_ce_proto;
 	case BPF_FUNC_tcp_gen_syncookie:
 		return &bpf_tcp_gen_syncookie_proto;
+	case BPF_FUNC_sk_assign:
+		return &bpf_sk_assign_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
@@ -6295,7 +6412,7 @@
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
 	case BPF_FUNC_perf_event_output:
-		return &bpf_sockopt_event_output_proto;
+		return &bpf_event_output_data_proto;
 	case BPF_FUNC_sk_storage_get:
 		return &bpf_sk_storage_get_proto;
 	case BPF_FUNC_sk_storage_delete:
@@ -6330,6 +6447,16 @@
 		return &bpf_msg_push_data_proto;
 	case BPF_FUNC_msg_pop_data:
 		return &bpf_msg_pop_data_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_event_output_data_proto;
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_current_pid_tgid:
+		return &bpf_get_current_pid_tgid_proto;
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	case BPF_FUNC_get_cgroup_classid:
+		return &bpf_get_cgroup_classid_curr_proto;
+#endif
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -6936,6 +7063,8 @@
 		switch (prog->expected_attach_type) {
 		case BPF_CGROUP_INET4_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
+		case BPF_CGROUP_INET4_GETPEERNAME:
+		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP4_RECVMSG:
 			break;
@@ -6947,6 +7076,8 @@
 		switch (prog->expected_attach_type) {
 		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_INET6_GETSOCKNAME:
 		case BPF_CGROUP_UDP6_SENDMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
 			break;

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9bf1551..9df3119 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c

@@ -69,6 +69,20 @@
 
 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
 
+static atomic64_t cookie_gen;
+
+u64 net_gen_cookie(struct net *net)
+{
+	while (1) {
+		u64 res = atomic64_read(&net->net_cookie);
+
+		if (res)
+			return res;
+		res = atomic64_inc_return(&cookie_gen);
+		atomic64_cmpxchg(&net->net_cookie, 0, res);
+	}
+}
+
 static struct net_generic *net_alloc_generic(void)
 {
 	struct net_generic *ng;
@@ -1108,6 +1122,7 @@
 		panic("Could not allocate generic netns");
 
 	rcu_assign_pointer(init_net.gen, ng);
+	net_gen_cookie(&init_net);
 
 	down_write(&pernet_ops_rwsem);
 	if (setup_net(&init_net, &init_user_ns))

diff --git a/net/core/sock.c b/net/core/sock.c
index 57b7a10..bf4f9d5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c

@@ -2074,6 +2074,18 @@
 }
 EXPORT_SYMBOL(sock_efree);
 
+/* Buffer destructor for prefetch/receive path where reference count may
+ * not be held, e.g. for listen sockets.
+ */
+#ifdef CONFIG_INET
+void sock_pfree(struct sk_buff *skb)
+{
+	if (sk_is_refcounted(skb->sk))
+		sock_gen_put(skb->sk);
+}
+EXPORT_SYMBOL(sock_pfree);
+#endif /* CONFIG_INET */
+
 kuid_t sock_i_uid(struct sock *sk)
 {
 	kuid_t uid;

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70f92aa..c530271 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c

@@ -753,12 +753,11 @@
 }
 EXPORT_SYMBOL(inet_accept);
 
-
 /*
  *	This does both peername and sockname.
  */
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
-			int peer)
+		 int peer)
 {
 	struct sock *sk		= sock->sk;
 	struct inet_sock *inet	= inet_sk(sk);
@@ -779,6 +778,11 @@
 		sin->sin_port = inet->inet_sport;
 		sin->sin_addr.s_addr = addr;
 	}
+	if (cgroup_bpf_enabled)
+		BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+					    peer ? BPF_CGROUP_INET4_GETPEERNAME :
+						   BPF_CGROUP_INET4_GETSOCKNAME,
+					    NULL);
 	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 	return sizeof(*sin);
 }

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c59a78a..72cbfc7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c

@@ -494,7 +494,8 @@
 	IPCB(skb)->iif = skb->skb_iif;
 
 	/* Must drop socket now because of tproxy. */
-	skb_orphan(skb);
+	if (!skb_sk_is_prefetched(skb))
+		skb_orphan(skb);
 
 	return skb;
 

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index de04d99..77b5550 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c

@@ -2289,6 +2289,7 @@
 	struct rtable *rt = skb_rtable(skb);
 	__be32 saddr, daddr;
 	struct net *net = dev_net(skb->dev);
+	bool refcounted;
 
 	/*
 	 *  Validate the packet.
@@ -2314,7 +2315,7 @@
 	if (udp4_csum_init(skb, uh, proto))
 		goto csum_error;
 
-	sk = skb_steal_sock(skb);
+	sk = skb_steal_sock(skb, &refcounted);
 	if (sk) {
 		struct dst_entry *dst = skb_dst(skb);
 		int ret;
@@ -2323,7 +2324,8 @@
 			udp_sk_rx_dst_set(sk, dst);
 
 		ret = udp_unicast_rcv_skb(sk, skb, uh);
-		sock_put(sk);
+		if (refcounted)
+			sock_put(sk);
 		return ret;
 	}
 

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 14ac1d9..224dddc 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c

@@ -501,9 +501,8 @@
 /*
  *	This does both peername and sockname.
  */
-
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
-		 int peer)
+		  int peer)
 {
 	struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -528,9 +527,13 @@
 			sin->sin6_addr = np->saddr;
 		else
 			sin->sin6_addr = sk->sk_v6_rcv_saddr;
-
 		sin->sin6_port = inet->inet_sport;
 	}
+	if (cgroup_bpf_enabled)
+		BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+					    peer ? BPF_CGROUP_INET6_GETPEERNAME :
+						   BPF_CGROUP_INET6_GETSOCKNAME,
+					    NULL);
 	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
 						 sk->sk_bound_dev_if);
 	return sizeof(*sin);

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 7e5df23..7aa900f 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c

@@ -253,7 +253,8 @@
 	rcu_read_unlock();
 
 	/* Must drop socket now because of tproxy. */
-	skb_orphan(skb);
+	if (!skb_sk_is_prefetched(skb))
+		skb_orphan(skb);
 
 	return skb;
 err:

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5b8266f..894d910 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c

@@ -847,6 +847,7 @@
 	struct net *net = dev_net(skb->dev);
 	struct udphdr *uh;
 	struct sock *sk;
+	bool refcounted;
 	u32 ulen = 0;
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -883,7 +884,7 @@
 		goto csum_error;
 
 	/* Check if the socket is already available, e.g. due to early demux */
-	sk = skb_steal_sock(skb);
+	sk = skb_steal_sock(skb, &refcounted);
 	if (sk) {
 		struct dst_entry *dst = skb_dst(skb);
 		int ret;
@@ -892,12 +893,14 @@
 			udp6_sk_rx_dst_set(sk, dst);
 
 		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
-			sock_put(sk);
+			if (refcounted)
+				sock_put(sk);
 			goto report_csum_error;
 		}
 
 		ret = udp6_unicast_rcv_skb(sk, skb, uh);
-		sock_put(sk);
+		if (refcounted)
+			sock_put(sk);
 		return ret;
 	}
 

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b7d83d0..0b015ce 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c

@@ -12,6 +12,7 @@
 #include <linux/bpf.h>
 
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 
@@ -53,6 +54,8 @@
 		bpf_compute_data_pointers(skb);
 		filter_res = BPF_PROG_RUN(filter, skb);
 	}
+	if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
+		skb_orphan(skb);
 	rcu_read_unlock();
 
 	/* A BPF program may overwrite the default action opcode.

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index b14a7d4..238fb47 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include

@@ -82,21 +82,20 @@
 			$(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c))))
 
 # output directory for tests below
-TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$
+TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/)
 
 # try-run
 # Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
 # Exit code chooses option. "$$TMP" serves as a temporary file and is
 # automatically cleaned up.
 try-run = $(shell set -e;		\
-	TMP=$(TMPOUT)/tmp;		\
-	TMPO=$(TMPOUT)/tmp.o;		\
-	mkdir -p $(TMPOUT);		\
-	trap "rm -rf $(TMPOUT)" EXIT;	\
+	TMP="$(TMPOUT).$$$$.tmp";	\
+	TMPO="$(TMPOUT).$$$$.o";	\
 	if ($(1)) >/dev/null 2>&1;	\
 	then echo "$(2)";		\
 	else echo "$(3)";		\
-	fi)
+	fi;				\
+	rm -f "$$TMP" "$$TMPO")
 
 # as-option
 # Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)

diff --git a/security/Kconfig b/security/Kconfig
index 2a1a2d3..2a8bb1d 100644
--- a/security/Kconfig
+++ b/security/Kconfig

@@ -236,6 +236,7 @@
 source "security/apparmor/Kconfig"
 source "security/loadpin/Kconfig"
 source "security/yama/Kconfig"
+source "security/container/Kconfig"
 source "security/safesetid/Kconfig"
 source "security/lockdown/Kconfig"
 

diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index af4c979b..269967c 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening

@@ -19,13 +19,16 @@
 
 menu "Memory initialization"
 
-config CC_HAS_AUTO_VAR_INIT
+config CC_HAS_AUTO_VAR_INIT_PATTERN
 	def_bool $(cc-option,-ftrivial-auto-var-init=pattern)
 
+config CC_HAS_AUTO_VAR_INIT_ZERO
+	def_bool $(cc-option,-ftrivial-auto-var-init=zero -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang)
+
 choice
 	prompt "Initialize kernel stack variables at function entry"
 	default GCC_PLUGIN_STRUCTLEAK_BYREF_ALL if COMPILE_TEST && GCC_PLUGINS
-	default INIT_STACK_ALL if COMPILE_TEST && CC_HAS_AUTO_VAR_INIT
+	default INIT_STACK_ALL_PATTERN if COMPILE_TEST && CC_HAS_AUTO_VAR_INIT_PATTERN
 	default INIT_STACK_NONE
 	help
 	  This option enables initialization of stack variables at
@@ -88,9 +91,9 @@
 		  of uninitialized stack variable exploits and information
 		  exposures.
 
-	config INIT_STACK_ALL
+	config INIT_STACK_ALL_PATTERN
 		bool "0xAA-init everything on the stack (strongest)"
-		depends on CC_HAS_AUTO_VAR_INIT
+		depends on CC_HAS_AUTO_VAR_INIT_PATTERN
 		help
 		  Initializes everything on the stack with a 0xAA
 		  pattern. This is intended to eliminate all classes
@@ -98,6 +101,24 @@
 		  exposures, even variables that were warned to have been
 		  left uninitialized.
 
+		  Pattern initialization is known to provoke many existing bugs
+		  related to uninitialized locals, e.g. pointers receive
+		  non-NULL values, buffer sizes and indices are very big.
+
+	config INIT_STACK_ALL_ZERO
+		bool "zero-init everything on the stack (strongest and safest)"
+		depends on CC_HAS_AUTO_VAR_INIT_ZERO
+		help
+		  Initializes everything on the stack with a zero
+		  value. This is intended to eliminate all classes
+		  of uninitialized stack variable exploits and information
+		  exposures, even variables that were warned to have been
+		  left uninitialized.
+
+		  Zero initialization provides safe defaults for strings,
+		  pointers, indices and sizes, and is therefore
+		  more suitable as a security mitigation measure.
+
 endchoice
 
 config GCC_PLUGIN_STRUCTLEAK_VERBOSE

diff --git a/security/Makefile b/security/Makefile
index be1dd9d..a5d987b 100644
--- a/security/Makefile
+++ b/security/Makefile

@@ -10,6 +10,7 @@
 subdir-$(CONFIG_SECURITY_APPARMOR)	+= apparmor
 subdir-$(CONFIG_SECURITY_YAMA)		+= yama
 subdir-$(CONFIG_SECURITY_LOADPIN)	+= loadpin
+subdir-$(CONFIG_SECURITY_CONTAINER_MONITOR) += container
 subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
 subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown
 
@@ -30,6 +31,7 @@
 obj-$(CONFIG_SECURITY_SAFESETID)       += safesetid/
 obj-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown/
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
+obj-$(CONFIG_SECURITY_CONTAINER_MONITOR) += container/
 
 # Object integrity file lists
 subdir-$(CONFIG_INTEGRITY)		+= integrity

diff --git a/security/container/Kconfig b/security/container/Kconfig
new file mode 100644
index 0000000..72a51eb
--- /dev/null
+++ b/security/container/Kconfig

@@ -0,0 +1,17 @@
+config SECURITY_CONTAINER_MONITOR
+	bool "Monitor containerized processes"
+	depends on SECURITY
+	depends on MMU
+	depends on X86_64
+	select SECURITYFS
+	help
+	  Instrument the Linux kernel to collect more information about containers
+	  and identify security threats.
+
+config SECURITY_CONTAINER_MONITOR_DEBUG
+    bool "Enable debug pr_devel logs"
+	depends on SECURITY_CONTAINER_MONITOR
+	help
+	  Define DEBUG for CSM files to compile verbose debugging messages.
+
+	  Only for debugging/testing do not enable for production.

diff --git a/security/container/Makefile b/security/container/Makefile
new file mode 100644
index 0000000..9be2528
--- /dev/null
+++ b/security/container/Makefile

@@ -0,0 +1,16 @@
+PB_CCFLAGS := -DPB_SYSTEM_HEADER="<pbsystem.h>" \
+	-DPB_NO_ERRMSG \
+	-DPB_FIELD_16BIT \
+	-DPB_BUFFER_ONLY
+export PB_CCFLAGS
+
+subdir-$(CONFIG_SECURITY_CONTAINER_MONITOR) += protos
+
+obj-$(CONFIG_SECURITY_CONTAINER_MONITOR) += protos/
+obj-$(CONFIG_SECURITY_CONTAINER_MONITOR) += monitor.o pb.o process.o pipe.o
+
+ccflags-y := -I$(srctree)/security/container/protos \
+	-I$(srctree)/security/container/protos/nanopb \
+	-I$(srctree)/fs \
+	$(PB_CCFLAGS)
+ccflags-$(CONFIG_SECURITY_CONTAINER_MONITOR_DEBUG) += -DDEBUG

diff --git a/security/container/monitor.c b/security/container/monitor.c
new file mode 100644
index 0000000..4d3a9d6
--- /dev/null
+++ b/security/container/monitor.c

@@ -0,0 +1,747 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Container Security Monitor module
+ *
+ * Copyright (c) 2018 Google, Inc
+ */
+
+#include "monitor.h"
+#include "process.h"
+
+#include <linux/audit.h>
+#include <linux/file.h>
+#include <linux/lsm_hooks.h>
+#include <linux/module.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/poll.h>
+#include <linux/rwsem.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/sysctl.h>
+
+/* protects csm_*_enabled and configurations. */
+DECLARE_RWSEM(csm_rwsem_config);
+
+/* queue used for poll wait on config changes. */
+static DECLARE_WAIT_QUEUE_HEAD(config_wait);
+
+/* increase each time a new configuration is applied. */
+static unsigned long config_version;
+
+/* Stats gathered from the LSM. */
+struct container_stats csm_stats;
+
+struct container_stats_mapping {
+	const char *key;
+	size_t *value;
+};
+
+/* Key value pair mapping for the sysfs entry. */
+struct container_stats_mapping csm_stats_mapping[] = {
+	{ "ProtoEncodingFailed", &csm_stats.proto_encoding_failed },
+	{ "WorkQueueFailed", &csm_stats.workqueue_failed },
+	{ "EventWritingFailed", &csm_stats.event_writing_failed },
+	{ "SizePickingFailed", &csm_stats.size_picking_failed },
+	{ "PipeAlreadyOpened", &csm_stats.pipe_already_opened },
+};
+
+/*
+ * Is monitoring enabled? Defaults to disabled.
+ * These variables might be used without locking csm_rwsem_config to check if an
+ * LSM hook can bail quickly. The semaphore is taken later to ensure CSM is
+ * still enabled.
+ *
+ * csm_enabled is true if any collector is enabled.
+ */
+bool csm_enabled;
+static bool csm_container_enabled;
+bool csm_execute_enabled;
+bool csm_memexec_enabled;
+
+/* securityfs control files */
+static struct dentry *csm_dir;
+static struct dentry *csm_enabled_file;
+static struct dentry *csm_container_file;
+static struct dentry *csm_config_file;
+static struct dentry *csm_config_vers_file;
+static struct dentry *csm_pipe_file;
+static struct dentry *csm_stats_file;
+
+/* Pipes to forward data to user-mode. */
+DECLARE_RWSEM(csm_rwsem_pipe);
+static struct file *csm_user_read_pipe;
+struct file *csm_user_write_pipe;
+
+/* Option to disable the CSM features at boot. */
+static bool cmdline_boot_disabled;
+bool cmdline_boot_vsock_enabled;
+
+/* Options disabled by default. */
+static bool cmdline_boot_pipe_enabled;
+static bool cmdline_boot_config_enabled;
+
+/* Option to fully enabled the LSM at boot for automated testing. */
+static bool cmdline_default_enabled;
+
+static int csm_boot_disabled_setup(char *str)
+{
+	return kstrtobool(str, &cmdline_boot_disabled);
+}
+early_param("csm.disabled", csm_boot_disabled_setup);
+
+static int csm_default_enabled_setup(char *str)
+{
+	return kstrtobool(str, &cmdline_default_enabled);
+}
+early_param("csm.default.enabled", csm_default_enabled_setup);
+
+static int csm_boot_vsock_enabled_setup(char *str)
+{
+	return kstrtobool(str, &cmdline_boot_vsock_enabled);
+}
+early_param("csm.vsock.enabled", csm_boot_vsock_enabled_setup);
+
+static int csm_boot_pipe_enabled_setup(char *str)
+{
+	return kstrtobool(str, &cmdline_boot_pipe_enabled);
+}
+early_param("csm.pipe.enabled", csm_boot_pipe_enabled_setup);
+
+static int csm_boot_config_enabled_setup(char *str)
+{
+	return kstrtobool(str, &cmdline_boot_config_enabled);
+}
+early_param("csm.config.enabled", csm_boot_config_enabled_setup);
+
+static bool pipe_in_use(void)
+{
+	struct pipe_inode_info *pipe;
+
+	lockdep_assert_held_write(&csm_rwsem_config);
+	if (csm_user_read_pipe) {
+		pipe = get_pipe_info(csm_user_read_pipe);
+		if (pipe)
+			return READ_ONCE(pipe->readers) > 1;
+	}
+	return false;
+}
+
+/* Close pipe, force has to be true to close pipe if it is still being used. */
+int close_pipe_files(bool force)
+{
+	if (csm_user_read_pipe) {
+		/* Pipe is still used. */
+		if (pipe_in_use()) {
+			if (!force)
+				return -EBUSY;
+			pr_warn("pipe is closed while it is still being used.\n");
+		}
+
+		fput(csm_user_read_pipe);
+		fput(csm_user_write_pipe);
+		csm_user_read_pipe = NULL;
+		csm_user_write_pipe = NULL;
+	}
+	return 0;
+}
+
+static void csm_update_config(schema_ConfigurationRequest *req)
+{
+	schema_ExecuteCollectorConfig *econf;
+	size_t i;
+	bool enumerate_processes = false;
+
+	/* Expect the lock to be held for write before this call. */
+	lockdep_assert_held_write(&csm_rwsem_config);
+
+	/* This covers the scenario where a client is connected and the config
+	 * transitions the execute collector from disabled to enabled. In that
+	 * case there may have been execute events not sent. So they are
+	 * enumerated.
+	 */
+	if (!csm_execute_enabled && req->execute_config.enabled &&
+	    pipe_in_use())
+		enumerate_processes = true;
+
+	csm_container_enabled = req->container_config.enabled;
+	csm_execute_enabled = req->execute_config.enabled;
+	csm_memexec_enabled = req->memexec_config.enabled;
+
+	/* csm_enabled is true if any collector is enabled. */
+	csm_enabled = csm_container_enabled || csm_execute_enabled ||
+		csm_memexec_enabled;
+
+	/* Clean-up existing configurations. */
+	kfree(csm_execute_config.envp_allowlist);
+	memset(&csm_execute_config, 0, sizeof(csm_execute_config));
+
+	if (csm_execute_enabled) {
+		econf = &req->execute_config;
+		csm_execute_config.argv_limit = econf->argv_limit;
+		csm_execute_config.envp_limit = econf->envp_limit;
+
+		/* Swap the allowlist so it is not freed on return. */
+		csm_execute_config.envp_allowlist = econf->envp_allowlist.arg;
+		econf->envp_allowlist.arg = NULL;
+	}
+
+	/* Reset all stats and close pipe if disabled. */
+	if (!csm_enabled) {
+		for (i = 0; i < ARRAY_SIZE(csm_stats_mapping); i++)
+			*csm_stats_mapping[i].value = 0;
+
+		close_pipe_files(true);
+	}
+
+	config_version++;
+	if (enumerate_processes)
+		csm_enumerate_processes();
+	wake_up(&config_wait);
+}
+
+int csm_update_config_from_buffer(void *data, size_t size)
+{
+	schema_ConfigurationRequest c = {};
+	pb_istream_t istream;
+
+	c.execute_config.envp_allowlist.funcs.decode = pb_decode_string_array;
+
+	istream = pb_istream_from_buffer(data, size);
+	if (!pb_decode(&istream, schema_ConfigurationRequest_fields, &c)) {
+		kfree(c.execute_config.envp_allowlist.arg);
+		return -EINVAL;
+	}
+
+	down_write(&csm_rwsem_config);
+	csm_update_config(&c);
+	up_write(&csm_rwsem_config);
+
+	return 0;
+}
+
+static ssize_t csm_config_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	ssize_t err = 0;
+	void *mem;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* No partial writes. */
+	if (*ppos != 0)
+		return -EINVAL;
+
+	/* Duplicate user memory to safely parse protobuf. */
+	mem = memdup_user(buf, count);
+	if (IS_ERR(mem))
+		return PTR_ERR(mem);
+
+	err = csm_update_config_from_buffer(mem, count);
+	if (!err)
+		err = count;
+
+	kfree(mem);
+	return err;
+}
+
+static const struct file_operations csm_config_fops = {
+	.write = csm_config_write,
+};
+
+static void csm_enable(void)
+{
+	schema_ConfigurationRequest req = {};
+
+	/* Expect the lock to be held for write before this call. */
+	lockdep_assert_held_write(&csm_rwsem_config);
+
+	/* Default configuration */
+	req.container_config.enabled = true;
+	req.execute_config.enabled = true;
+	req.execute_config.argv_limit = UINT_MAX;
+	req.execute_config.envp_limit = UINT_MAX;
+	req.memexec_config.enabled = true;
+	csm_update_config(&req);
+}
+
+static void csm_disable(void)
+{
+	schema_ConfigurationRequest req = {};
+
+	/* Expect the lock to be held for write before this call. */
+	lockdep_assert_held_write(&csm_rwsem_config);
+
+	/* Zero configuration disable all collectors. */
+	csm_update_config(&req);
+	pr_info("disabled\n");
+}
+
+static ssize_t csm_enabled_read(struct file *file, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	const char *str = csm_enabled ? "1\n" : "0\n";
+
+	return simple_read_from_buffer(buf, count, ppos, str, 2);
+}
+
+static ssize_t csm_enabled_write(struct file *file, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	bool enabled;
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (count <= 0 || count > PAGE_SIZE || *ppos)
+		return -EINVAL;
+
+	err = kstrtobool_from_user(buf, count, &enabled);
+	if (err)
+		return err;
+
+	down_write(&csm_rwsem_config);
+
+	if (enabled)
+		csm_enable();
+	else
+		csm_disable();
+
+	up_write(&csm_rwsem_config);
+
+	return count;
+}
+
+static const struct file_operations csm_enabled_fops = {
+	.read = csm_enabled_read,
+	.write = csm_enabled_write,
+};
+
+static int csm_config_version_open(struct inode *inode, struct file *file)
+{
+	/* private_data is used to keep the latest config version read. */
+	file->private_data = (void*)-1;
+	return 0;
+}
+
+static ssize_t csm_config_version_read(struct file *file, char __user *buf,
+				       size_t count, loff_t *ppos)
+{
+	unsigned long version = config_version;
+	file->private_data = (void*)version;
+	return simple_read_from_buffer(buf, count, ppos, &version,
+				       sizeof(version));
+}
+
+static __poll_t csm_config_version_poll(struct file *file,
+					struct poll_table_struct *poll_tab)
+{
+	if ((unsigned long)file->private_data != config_version)
+		return EPOLLIN;
+	poll_wait(file, &config_wait, poll_tab);
+	if ((unsigned long)file->private_data != config_version)
+		return EPOLLIN;
+	return 0;
+}
+
+static const struct file_operations csm_config_version_fops = {
+	.open = csm_config_version_open,
+	.read = csm_config_version_read,
+	.poll = csm_config_version_poll,
+};
+
+static int csm_pipe_open(struct inode *inode, struct file *file)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (!csm_enabled)
+		return -EAGAIN;
+	return 0;
+}
+
+/* Similar to file_clone_open that is available only in 4.19 and up. */
+static inline struct file *pipe_clone_open(struct file *file)
+{
+	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
+}
+
+/* Check if the pipe is still used, else recreate and dup it. */
+static struct file *csm_dup_pipe(void)
+{
+	long pipe_size = 1024 * PAGE_SIZE;
+	long actual_size;
+	struct file *pipes[2] = {NULL, NULL};
+	struct file *ret;
+	int err;
+
+	down_write(&csm_rwsem_pipe);
+
+	err = close_pipe_files(false);
+	if (err) {
+		ret = ERR_PTR(err);
+		csm_stats.pipe_already_opened++;
+		goto out;
+	}
+
+	err = create_pipe_files(pipes, O_NONBLOCK);
+	if (err) {
+		ret = ERR_PTR(err);
+		goto out;
+	}
+
+	/*
+	 * Try to increase the pipe size to 1024 pages, if there is not
+	 * enough memory, pipes will stay unchanged.
+	 */
+	actual_size = pipe_fcntl(pipes[0], F_SETPIPE_SZ, pipe_size);
+	if (actual_size != pipe_size)
+		pr_err("failed to resize pipe to 1024 pages, error: %ld, fallback to the default value\n",
+		       actual_size);
+
+	csm_user_read_pipe = pipes[0];
+	csm_user_write_pipe = pipes[1];
+
+	/* Clone the file so we can track if the reader is still used. */
+	ret = pipe_clone_open(csm_user_read_pipe);
+
+out:
+	up_write(&csm_rwsem_pipe);
+	return ret;
+}
+
+static ssize_t csm_pipe_read(struct file *file, char __user *buf,
+				       size_t count, loff_t *ppos)
+{
+	int fd;
+	ssize_t err;
+	struct file *local_pipe;
+
+	/* No partial reads. */
+	if (*ppos != 0)
+		return -EINVAL;
+
+	fd = get_unused_fd_flags(0);
+	if (fd < 0)
+		return fd;
+
+	local_pipe = csm_dup_pipe();
+	if (IS_ERR(local_pipe)) {
+		err = PTR_ERR(local_pipe);
+		local_pipe = NULL;
+		goto error;
+	}
+
+	err = simple_read_from_buffer(buf, count, ppos, &fd, sizeof(fd));
+	if (err < 0)
+		goto error;
+
+	if (err < sizeof(fd)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* Install the file descriptor when we know everything succeeded. */
+	fd_install(fd, local_pipe);
+
+	csm_enumerate_processes();
+
+	return err;
+
+error:
+	if (local_pipe)
+		fput(local_pipe);
+	put_unused_fd(fd);
+	return err;
+}
+
+
+static const struct file_operations csm_pipe_fops = {
+	.open = csm_pipe_open,
+	.read = csm_pipe_read,
+};
+
+static void set_container_decode_callbacks(schema_Container *container)
+{
+	container->pod_namespace.funcs.decode = pb_decode_string_field;
+	container->pod_name.funcs.decode = pb_decode_string_field;
+	container->container_name.funcs.decode = pb_decode_string_field;
+	container->container_image_uri.funcs.decode = pb_decode_string_field;
+	container->labels.funcs.decode = pb_decode_string_array;
+}
+
+static void set_container_encode_callbacks(schema_Container *container)
+{
+	container->pod_namespace.funcs.encode = pb_encode_string_field;
+	container->pod_name.funcs.encode = pb_encode_string_field;
+	container->container_name.funcs.encode = pb_encode_string_field;
+	container->container_image_uri.funcs.encode = pb_encode_string_field;
+	container->labels.funcs.encode = pb_encode_string_array;
+}
+
+static void free_container_callbacks_args(schema_Container *container)
+{
+	kfree(container->pod_namespace.arg);
+	kfree(container->pod_name.arg);
+	kfree(container->container_name.arg);
+	kfree(container->container_image_uri.arg);
+	kfree(container->labels.arg);
+}
+
+static ssize_t csm_container_write(struct file *file, const char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	ssize_t err = 0;
+	void *mem;
+	u64 cid;
+	pb_istream_t istream;
+	struct task_struct *task;
+	schema_ContainerReport report = {};
+	schema_Event event = {};
+	schema_Container *container;
+	char *uuid = NULL;
+
+	/* Notify that this collector is not yet enabled. */
+	if (!csm_container_enabled)
+		return -EAGAIN;
+
+	/* No partial writes. */
+	if (*ppos != 0)
+		return -EINVAL;
+
+	/* Duplicate user memory to safely parse protobuf. */
+	mem = memdup_user(buf, count);
+	if (IS_ERR(mem))
+		return PTR_ERR(mem);
+
+	/* Callback to decode string in protobuf. */
+	set_container_decode_callbacks(&report.container);
+
+	istream = pb_istream_from_buffer(mem, count);
+	if (!pb_decode(&istream, schema_ContainerReport_fields, &report)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Check protobuf is as expected */
+	if (report.pid == 0 ||
+	    report.container.container_id != 0) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Find if the process id is linked to an existing container-id. */
+	rcu_read_lock();
+	task = find_task_by_pid_ns(report.pid, &init_pid_ns);
+	if (task) {
+		cid = audit_get_contid(task);
+		if (cid == AUDIT_CID_UNSET)
+			err = -ENOENT;
+	} else {
+		err = -ENOENT;
+	}
+	rcu_read_unlock();
+
+	if (err)
+		goto out;
+
+	uuid = kzalloc(PROCESS_UUID_SIZE, GFP_KERNEL);
+	if (!uuid)
+		goto out;
+
+	/* Provide the uuid for the top process of the container. */
+	err = get_process_uuid_by_pid(report.pid, uuid, PROCESS_UUID_SIZE);
+	if (err)
+		goto out;
+
+	/* Correct the container-id and feed the event to pipe */
+	report.has_container = true;
+	report.container.container_id = cid;
+	report.container.init_uuid.funcs.encode = pb_encode_uuid_field;
+	report.container.init_uuid.arg = uuid;
+	container = &event.event.container.container;
+	*container = report.container;
+
+	/* Use encode callback to generate the final proto. */
+	set_container_encode_callbacks(container);
+
+	event.which_event = schema_Event_container_tag;
+	event.event.container.has_container = true;
+
+	err = csm_sendeventproto(schema_Event_fields, &event);
+	if (!err)
+		err = count;
+
+out:
+	/* Free any allocated nanopb callback arguments. */
+	free_container_callbacks_args(&report.container);
+	kfree(uuid);
+	kfree(mem);
+	return err;
+}
+
+static const struct file_operations csm_container_fops = {
+	.write = csm_container_write,
+};
+
+static int csm_show_stats(struct seq_file *p, void *v)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(csm_stats_mapping); i++) {
+		seq_printf(p, "%s:\t%zu\n",
+			   csm_stats_mapping[i].key,
+			   *csm_stats_mapping[i].value);
+	}
+
+	return 0;
+}
+
+static int csm_stats_open(struct inode *inode, struct file *file)
+{
+	size_t i, size = 1; /* Start at one for the null byte. */
+
+	for (i = 0; i < ARRAY_SIZE(csm_stats_mapping); i++) {
+		/*
+		 * Calculate the maximum length:
+		 * - Length of the key
+		 * - 3 additional chars :\t\n
+		 * - longest unsigned 64-bit integer.
+		 */
+		size += strlen(csm_stats_mapping[i].key)
+			+ 3 + sizeof("18446744073709551615");
+	}
+
+	return single_open_size(file, csm_show_stats, NULL, size);
+}
+
+static const struct file_operations csm_stats_fops = {
+	.open		= csm_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int csm_setxattr(struct dentry *dentry, const char *name,
+			const void *value, size_t size, int flags)
+{
+	if (csm_enabled && !strcmp(name, XATTR_SECURITY_CSM))
+		return -EPERM;
+	return 0;
+}
+
+static struct security_hook_list csm_hooks[] __lsm_ro_after_init = {
+	/* Track process execution. */
+	LSM_HOOK_INIT(bprm_check_security, csm_bprm_check_security),
+	LSM_HOOK_INIT(task_post_alloc, csm_task_post_alloc),
+	LSM_HOOK_INIT(task_exit, csm_task_exit),
+
+	/* Track memory execution */
+	LSM_HOOK_INIT(file_mprotect, csm_mprotect),
+	LSM_HOOK_INIT(mmap_file, csm_mmap_file),
+
+	/* Track file modification provenance. */
+	LSM_HOOK_INIT(file_pre_free_security, csm_file_pre_free),
+
+	/* Block modyfing csm xattr. */
+	LSM_HOOK_INIT(inode_setxattr, csm_setxattr),
+};
+
+static int __init csm_init(void)
+{
+	int err;
+
+	if (cmdline_boot_disabled)
+		return 0;
+
+	if (cmdline_boot_vsock_enabled)
+		pr_debug("vsock is deprecated, but was enabled at boot\n");
+
+	csm_dir = securityfs_create_dir("container_monitor", NULL);
+	if (IS_ERR(csm_dir)) {
+		err = PTR_ERR(csm_dir);
+		goto error;
+	}
+
+	csm_enabled_file = securityfs_create_file("enabled", 0644, csm_dir,
+						  NULL, &csm_enabled_fops);
+	if (IS_ERR(csm_enabled_file)) {
+		err = PTR_ERR(csm_enabled_file);
+		goto error_rmdir;
+	}
+
+	csm_container_file = securityfs_create_file("container", 0200, csm_dir,
+						  NULL, &csm_container_fops);
+	if (IS_ERR(csm_container_file)) {
+		err = PTR_ERR(csm_container_file);
+		goto error_rm_enabled;
+	}
+
+	csm_config_vers_file = securityfs_create_file("config_version", 0400,
+						      csm_dir, NULL,
+						      &csm_config_version_fops);
+	if (IS_ERR(csm_config_vers_file)) {
+		err = PTR_ERR(csm_config_vers_file);
+		goto error_rm_container;
+	}
+
+	if (cmdline_boot_config_enabled) {
+		csm_config_file = securityfs_create_file("config", 0200,
+							 csm_dir, NULL,
+							 &csm_config_fops);
+		if (IS_ERR(csm_config_file)) {
+			err = PTR_ERR(csm_config_file);
+			goto error_rm_config_vers;
+		}
+	}
+
+	if (cmdline_boot_pipe_enabled) {
+		csm_pipe_file = securityfs_create_file("pipe", 0400, csm_dir,
+						       NULL, &csm_pipe_fops);
+		if (IS_ERR(csm_pipe_file)) {
+			err = PTR_ERR(csm_pipe_file);
+			goto error_rm_config;
+		}
+	}
+
+	csm_stats_file = securityfs_create_file("stats", 0400, csm_dir,
+						 NULL, &csm_stats_fops);
+	if (IS_ERR(csm_stats_file)) {
+		err = PTR_ERR(csm_stats_file);
+		goto error_rm_pipe;
+	}
+
+	pr_debug("created securityfs control files\n");
+
+	security_add_hooks(csm_hooks, ARRAY_SIZE(csm_hooks), "csm");
+	pr_debug("registered hooks\n");
+
+	/* Off-by-default, only used for testing images. */
+	if (cmdline_default_enabled) {
+		down_write(&csm_rwsem_config);
+		csm_enable();
+		up_write(&csm_rwsem_config);
+	}
+
+	return 0;
+
+error_rm_pipe:
+	if (cmdline_boot_pipe_enabled)
+		securityfs_remove(csm_pipe_file);
+error_rm_config:
+	if (cmdline_boot_config_enabled)
+		securityfs_remove(csm_config_file);
+error_rm_config_vers:
+	securityfs_remove(csm_config_vers_file);
+error_rm_container:
+	securityfs_remove(csm_container_file);
+error_rm_enabled:
+	securityfs_remove(csm_enabled_file);
+error_rmdir:
+	securityfs_remove(csm_dir);
+error:
+	pr_warn("fs initialization error: %d", err);
+	return err;
+}
+
+late_initcall(csm_init);

diff --git a/security/container/monitor.h b/security/container/monitor.h
new file mode 100644
index 0000000..221d7f5
--- /dev/null
+++ b/security/container/monitor.h

@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Container Security Monitor module
+ *
+ * Copyright (c) 2018 Google, Inc
+ */
+
+#define pr_fmt(fmt)	"container-security-monitor: " fmt
+
+#include <linux/kernel.h>
+#include <linux/security.h>
+#include <linux/fs.h>
+#include <linux/rwsem.h>
+#include <linux/binfmts.h>
+#include <linux/xattr.h>
+#include <config.pb.h>
+#include <event.pb.h>
+#include <pb_encode.h>
+#include <pb_decode.h>
+
+#include "monitoring_protocol.h"
+
+/* Part of the CSM configuration response. */
+#define CSM_VERSION 1
+
+/* protects csm_*_enabled and configurations. */
+extern struct rw_semaphore csm_rwsem_config;
+
+/*
+ * Is monitoring enabled? Defaults to disabled.
+ * These variables might be used as gates without locking (as processor ensures
+ * valid proper access for native scalar values) so it can bail quickly.
+ */
+extern bool csm_enabled;
+extern bool csm_execute_enabled;
+extern bool csm_memexec_enabled;
+
+/* Configuration options for execute collector. */
+struct execute_config {
+	size_t argv_limit;
+	size_t envp_limit;
+	char *envp_allowlist;
+};
+
+extern struct execute_config csm_execute_config;
+
+/* pipe to forward events to user-mode. */
+extern struct rw_semaphore csm_rwsem_pipe;
+extern struct file *csm_user_write_pipe;
+
+/* Stats on LSM events. */
+struct container_stats {
+	size_t proto_encoding_failed;
+	size_t event_writing_failed;
+	size_t workqueue_failed;
+	size_t size_picking_failed;
+	size_t pipe_already_opened;
+};
+
+extern struct container_stats csm_stats;
+
+/* Streams file numbers are unknown from the kernel */
+#define STDIN_FILENO	0
+#define STDOUT_FILENO	1
+#define STDERR_FILENO	2
+
+/* security attribute for file provenance. */
+#define XATTR_SECURITY_CSM XATTR_SECURITY_PREFIX "csm"
+
+/* monitor functions */
+int csm_update_config_from_buffer(void *data, size_t size);
+
+/* send event to userland */
+int csm_sendeventproto(const pb_msgdesc_t *fields, schema_Event *event);
+
+/* process events functions */
+int csm_bprm_check_security(struct linux_binprm *bprm);
+void csm_task_exit(struct task_struct *task);
+void csm_task_post_alloc(struct task_struct *task);
+int get_process_uuid_by_pid(pid_t pid_nr, char *buffer, size_t size);
+
+/* memory execution events functions */
+int csm_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
+					  unsigned long prot);
+int csm_mmap_file(struct file *file, unsigned long reqprot,
+				  unsigned long prot, unsigned long flags);
+
+/* Tracking of file modification provenance. */
+void csm_file_pre_free(struct file *file);
+
+/* nano functions */
+bool pb_encode_string_field(pb_ostream_t *stream, const pb_field_t *field,
+			    void * const *arg);
+bool pb_decode_string_field(pb_istream_t *stream, const pb_field_t *field,
+		      void **arg);
+ssize_t pb_encode_string_field_limit(pb_ostream_t *stream,
+				     const pb_field_t *field,
+				     void * const *arg, size_t limit);
+bool pb_encode_string_array(pb_ostream_t *stream, const pb_field_t *field,
+			    void * const *arg);
+bool pb_decode_string_array(pb_istream_t *stream, const pb_field_t *field,
+			    void **arg);
+bool pb_encode_uuid_field(pb_ostream_t *stream, const pb_field_t *field,
+			  void * const *arg);
+bool pb_encode_ip4(pb_ostream_t *stream, const pb_field_t *field,
+		   void * const *arg);
+bool pb_encode_ip6(pb_ostream_t *stream, const pb_field_t *field,
+		   void * const *arg);
+

diff --git a/security/container/monitoring_protocol.h b/security/container/monitoring_protocol.h
new file mode 100644
index 0000000..dbdfc9c
--- /dev/null
+++ b/security/container/monitoring_protocol.h

@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+/* Container security monitoring protocol definitions */
+
+#include <linux/types.h>
+
+enum csm_msgtype {
+	CSM_MSG_TYPE_HEARTBEAT = 1,
+	CSM_MSG_EVENT_PROTO = 2,
+	CSM_MSG_CONFIG_REQUEST_PROTO = 3,
+	CSM_MSG_CONFIG_RESPONSE_PROTO = 4,
+};
+
+struct csm_msg_hdr {
+	__le32 msg_type;
+	__le32 msg_length;
+};
+
+/* The process uuid is a 128-bits identifier */
+#define PROCESS_UUID_SIZE 16
+
+/* The entire structure forms the collision domain. */
+union process_uuid {
+	struct {
+		__u32 machineid;
+		__u64 start_time;
+		__u32 tgid;
+	} __attribute__((packed));
+	__u8 data[PROCESS_UUID_SIZE];
+};

diff --git a/security/container/pb.c b/security/container/pb.c
new file mode 100644
index 0000000..1cc7ecf
--- /dev/null
+++ b/security/container/pb.c

@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Container Security Monitor module
+ *
+ * Copyright (c) 2018 Google, Inc
+ */
+
+#include "monitor.h"
+
+#include <linux/string.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+
+bool pb_encode_string_field(pb_ostream_t *stream, const pb_field_t *field,
+			    void * const *arg)
+{
+	const uint8_t *str = (const uint8_t *)*arg;
+
+	/* If the string is not set, skip this string. */
+	if (!str)
+		return true;
+
+	if (!pb_encode_tag_for_field(stream, field))
+		return false;
+
+	return pb_encode_string(stream, str, strlen(str));
+}
+
+bool pb_decode_string_field(pb_istream_t *stream, const pb_field_t *field,
+			    void **arg)
+{
+	size_t size;
+	void *data;
+
+	*arg = NULL;
+
+	size = stream->bytes_left;
+
+	/* Ensure a null-byte at the end */
+	if (size + 1 < size)
+		return false;
+
+	data = kzalloc(size + 1, GFP_KERNEL);
+	if (!data)
+		return false;
+
+	if (!pb_read(stream, data, size)) {
+		kfree(data);
+		return false;
+	}
+
+	*arg = data;
+
+	return true;
+}
+
+bool pb_encode_string_array(pb_ostream_t *stream, const pb_field_t *field,
+			    void * const *arg)
+{
+	char *strs = (char *)*arg;
+
+	/* If the string array is not set, skip this string array. */
+	if (!strs)
+		return true;
+
+	do {
+		if (!pb_encode_string_field(stream, field,
+					    (void * const *) &strs))
+			return false;
+
+		strs += strlen(strs) + 1;
+	} while (*strs != 0);
+
+	return true;
+}
+
+/* Limit the encoded string size and return how many characters were added. */
+ssize_t pb_encode_string_field_limit(pb_ostream_t *stream,
+				     const pb_field_t *field,
+				     void * const *arg, size_t limit)
+{
+	char *str = (char *)*arg;
+	size_t length;
+
+	/* If the string is not set, skip this string. */
+	if (!str)
+		return 0;
+
+	if (!pb_encode_tag_for_field(stream, field))
+		return -EINVAL;
+
+	length = strlen(str);
+	if (length > limit)
+		length = limit;
+
+	if (!pb_encode_string(stream, (uint8_t *)str, length))
+		return -EINVAL;
+
+	return length;
+}
+
+bool pb_decode_string_array(pb_istream_t *stream, const pb_field_t *field,
+			    void **arg)
+{
+	size_t needed, used = 0;
+	char *data, *strs;
+
+	/* String length, and two null-bytes for the end of the list. */
+	needed = stream->bytes_left + 2;
+	if (needed < stream->bytes_left)
+		return false;
+
+	if (*arg) {
+		/* Calculate used space from the current list. */
+		strs = (char *)*arg;
+		do {
+			used += strlen(strs + used) + 1;
+		} while (strs[used] != 0);
+
+		if (used + needed < needed)
+			return false;
+	}
+
+	data = krealloc(*arg, used + needed, GFP_KERNEL);
+	if (!data)
+		return false;
+
+	/* Will always be freed by the caller */
+	*arg = data;
+
+	/* Reset the new part of the buffer. */
+	memset(data + used, 0, needed);
+
+	/* Read what's in the stream buffer only. */
+	if (!pb_read(stream, data + used, stream->bytes_left))
+		return false;
+
+	return true;
+}
+
+bool pb_encode_fixed_string(pb_ostream_t *stream, const pb_field_t *field,
+			    const uint8_t *data, size_t length)
+{
+	/* If the data is not set, skip this string. */
+	if (!data)
+		return true;
+
+	if (!pb_encode_tag_for_field(stream, field))
+		return false;
+
+	return pb_encode_string(stream, data, length);
+}
+
+
+bool pb_encode_uuid_field(pb_ostream_t *stream, const pb_field_t *field,
+			  void * const *arg)
+{
+	return pb_encode_fixed_string(stream, field, (const uint8_t *)*arg,
+				      PROCESS_UUID_SIZE);
+}
+
+bool pb_encode_ip4(pb_ostream_t *stream, const pb_field_t *field,
+		   void * const *arg)
+{
+	return pb_encode_fixed_string(stream, field, (const uint8_t *)*arg,
+				      sizeof(struct in_addr));
+}
+
+bool pb_encode_ip6(pb_ostream_t *stream, const pb_field_t *field,
+		   void * const *arg)
+{
+	return pb_encode_fixed_string(stream, field, (const uint8_t *)*arg,
+				      sizeof(struct in6_addr));
+}

diff --git a/security/container/pipe.c b/security/container/pipe.c
new file mode 100644
index 0000000..49f5fe8
--- /dev/null
+++ b/security/container/pipe.c

@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Container Security Monitor module
+ *
+ * Copyright (c) 2018 Google, Inc
+ */
+
+#include "monitor.h"
+
+#include <linux/pipe_fs_i.h>
+#include <linux/printk.h>
+#include <linux/ratelimit.h>
+#include <linux/uio.h>
+#include <linux/workqueue.h>
+
+/* csm protobuf work */
+static void csm_sendmsg_pipe_handler(struct work_struct *work);
+
+/* csm message work container */
+struct msg_work_data {
+	struct work_struct msg_work;
+	size_t pos_bytes_written;
+	char msg[];
+};
+
+/* Mutex to ensure sequential dumping of protos */
+static DEFINE_MUTEX(protodump);
+
+static ssize_t csm_user_pipe_write(struct kvec *vecs, size_t vecs_size,
+				   size_t total_length)
+{
+	ssize_t perr = 0;
+	struct iov_iter io = { };
+	loff_t pos = 0;
+	struct pipe_inode_info *pipe;
+	unsigned int readers;
+
+	if (!csm_user_write_pipe)
+		return 0;
+
+	down_read(&csm_rwsem_pipe);
+
+	if (csm_user_write_pipe == NULL)
+		goto end;
+
+	/* The pipe info is the same for reader and write files. */
+	pipe = get_pipe_info(csm_user_write_pipe);
+
+	/* If nobody is listening, don't write events. */
+	readers = READ_ONCE(pipe->readers);
+	if (readers <= 1) {
+		WARN_ON(readers == 0);
+		goto end;
+	}
+
+
+	iov_iter_kvec(&io, ITER_KVEC|WRITE, vecs, vecs_size, total_length);
+
+	file_start_write(csm_user_write_pipe);
+	perr = vfs_iter_write(csm_user_write_pipe, &io, &pos, 0);
+	file_end_write(csm_user_write_pipe);
+
+end:
+	up_read(&csm_rwsem_pipe);
+	return perr;
+}
+
+static int csm_sendmsg(int type, const void *buf, size_t len)
+{
+	struct csm_msg_hdr hdr = {
+		.msg_type = cpu_to_le32(type),
+		.msg_length = cpu_to_le32(sizeof(hdr) + len),
+	};
+	struct kvec vecs[] = {
+		{
+			.iov_base = &hdr,
+			.iov_len = sizeof(hdr),
+		}, {
+			.iov_base = (void *)buf,
+			.iov_len = len,
+		}
+	};
+	ssize_t perr;
+
+	perr = csm_user_pipe_write(vecs, ARRAY_SIZE(vecs),
+				   le32_to_cpu(hdr.msg_length));
+	if (perr < 0) {
+		pr_warn_ratelimited("vfs_iter_write error (msg_type=%d, msg_length=%u): %zd\n",
+				    type, le32_to_cpu(hdr.msg_length), perr);
+		csm_stats.event_writing_failed++;
+	}
+
+	return perr;
+}
+
+static bool csm_get_expected_size(size_t *size, const pb_msgdesc_t *fields,
+				    const void *src_struct)
+{
+	schema_Event *event;
+
+	if (fields != schema_Event_fields)
+		goto other;
+
+	/* Size above 99% of the 100 containers tested running k8s. */
+	event = (schema_Event *)src_struct;
+	switch (event->which_event) {
+	case schema_Event_execute_tag:
+		*size = 3344;
+		return true;
+	case schema_Event_memexec_tag:
+		*size = 176;
+		return true;
+	case schema_Event_clone_tag:
+		*size = 50;
+		return true;
+	case schema_Event_exit_tag:
+		*size = 30;
+		return true;
+	}
+
+other:
+	/* If unknown, do the pre-computation. */
+	return pb_get_encoded_size(size, fields, src_struct);
+}
+
+static struct msg_work_data *csm_encodeproto(size_t size,
+					     const pb_msgdesc_t *fields,
+					     const void *src_struct)
+{
+	pb_ostream_t pos;
+	struct msg_work_data *wd;
+	size_t total;
+
+	total = size + sizeof(*wd);
+	if (total < size)
+		return ERR_PTR(-EINVAL);
+
+	wd = kmalloc(total, GFP_KERNEL);
+	if (!wd)
+		return ERR_PTR(-ENOMEM);
+
+	pos = pb_ostream_from_buffer(wd->msg, size);
+	if (!pb_encode(&pos, fields, src_struct)) {
+		kfree(wd);
+		return ERR_PTR(-EINVAL);
+	}
+
+	INIT_WORK(&wd->msg_work, csm_sendmsg_pipe_handler);
+	wd->pos_bytes_written = pos.bytes_written;
+	return wd;
+}
+
+static int csm_sendproto(int type, const pb_msgdesc_t *fields,
+			 const void *src_struct)
+{
+	int err = 0;
+	size_t size, previous_size;
+	struct msg_work_data *wd;
+
+	/* Use the expected size first. */
+	if (!csm_get_expected_size(&size, fields, src_struct))
+		return -EINVAL;
+
+	wd = csm_encodeproto(size, fields, src_struct);
+	if (IS_ERR(wd)) {
+		/* If it failed, retry with the exact size. */
+		csm_stats.size_picking_failed++;
+		previous_size = size;
+
+		if (!pb_get_encoded_size(&size, fields, src_struct))
+			return -EINVAL;
+
+		wd = csm_encodeproto(size, fields, src_struct);
+		if (IS_ERR(wd)) {
+			csm_stats.proto_encoding_failed++;
+			return PTR_ERR(wd);
+		}
+
+		pr_debug("size picking failed %lu vs %lu\n", previous_size,
+			 size);
+	}
+
+	/* The work handler takes care of cleanup, if successfully scheduled. */
+	if (likely(schedule_work(&wd->msg_work)))
+		return 0;
+
+	csm_stats.workqueue_failed++;
+	pr_err_ratelimited("Sent msg to workqueue unsuccessfully (assume dropped).\n");
+
+	kfree(wd);
+	return err;
+}
+
+static void csm_sendmsg_pipe_handler(struct work_struct *work)
+{
+	int err;
+	int type = CSM_MSG_EVENT_PROTO;
+	struct msg_work_data *wd = container_of(work, struct msg_work_data,
+						msg_work);
+
+	err = csm_sendmsg(type, wd->msg, wd->pos_bytes_written);
+	if (err < 0)
+		pr_err_ratelimited("csm_sendmsg failed in work handler %s\n",
+				   __func__);
+
+	kfree(wd);
+}
+
+int csm_sendeventproto(const pb_msgdesc_t *fields, schema_Event *event)
+{
+	/* Last check before generating and sending an event. */
+	if (!csm_enabled)
+		return -ENOTSUPP;
+
+	event->timestamp = ktime_get_real_ns();
+
+	return csm_sendproto(CSM_MSG_EVENT_PROTO, fields, event);
+}

diff --git a/security/container/process.c b/security/container/process.c
new file mode 100644
index 0000000..0bf84c4
--- /dev/null
+++ b/security/container/process.c

@@ -0,0 +1,1167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Container Security Monitor module
+ *
+ * Copyright (c) 2018 Google, Inc
+ */
+
+#include "monitor.h"
+
+#include <linux/atomic.h>
+#include <linux/audit.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+#include <linux/mempool.h>
+#include <linux/mm.h>
+#include <linux/mount.h>
+#include <linux/notifier.h>
+#include <linux/net.h>
+#include <linux/path.h>
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
+#include <linux/random.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/timekeeping.h>
+#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
+#include <linux/xattr.h>
+#include <net/ipv6.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <overlayfs/overlayfs.h>
+#include <uapi/linux/magic.h>
+#include <uapi/asm/mman.h>
+
+/* Configuration options for execute collector. */
+struct execute_config csm_execute_config;
+
+/* unique atomic value for the machine boot instance */
+static atomic_t machine_rand = ATOMIC_INIT(0);
+
+/* sequential container identifier */
+static atomic_t contid = ATOMIC_INIT(0);
+
+/* Generation id for each enumeration invocation. */
+static atomic_t enumeration_count = ATOMIC_INIT(0);
+
+struct file_provenance {
+	/* pid of the process doing the first write. */
+	pid_t tgid;
+	/* start_time of the process to uniquely identify it. */
+	u64 start_time;
+};
+
+struct csm_enumerate_processes_work_data {
+	struct work_struct work;
+	int enumeration_count;
+};
+
+static void *kmap_argument_stack(struct linux_binprm *bprm, void **ctx)
+{
+	char *argv;
+	int err;
+	unsigned long i, pos, count;
+	void *map;
+	struct page *page;
+
+	/* vma_pages() returns the number of pages reserved for the stack */
+	count = vma_pages(bprm->vma);
+
+	if (likely(count == 1)) {
+		err = get_user_pages_remote(current, bprm->mm, bprm->p, 1,
+					    FOLL_FORCE, &page, NULL, NULL);
+		if (err != 1)
+			return NULL;
+
+		argv = kmap(page);
+		*ctx = page;
+	} else {
+		/*
+		 * If more than one pages is needed, copy all of them to a set
+		 * of pages. Parsing the argument across kmap pages in different
+		 * addresses would make it impractical.
+		 */
+		argv = vmalloc(count * PAGE_SIZE);
+		if (!argv)
+			return NULL;
+
+		for (i = 0; i < count; i++) {
+			pos = ALIGN_DOWN(bprm->p, PAGE_SIZE) + i * PAGE_SIZE;
+			err = get_user_pages_remote(current, bprm->mm, pos, 1,
+						    FOLL_FORCE, &page, NULL,
+						    NULL);
+			if (err <= 0) {
+				vfree(argv);
+				return NULL;
+			}
+
+			map = kmap(page);
+			memcpy(argv + i * PAGE_SIZE, map, PAGE_SIZE);
+			kunmap(page);
+			put_page(page);
+		}
+		*ctx = bprm;
+	}
+
+	return argv;
+}
+
+static void kunmap_argument_stack(struct linux_binprm *bprm, void *addr,
+				  void *ctx)
+{
+	struct page *page;
+
+	if (!addr)
+		return;
+
+	if (likely(vma_pages(bprm->vma) == 1)) {
+		page = (struct page *)ctx;
+		kunmap(page);
+		put_page(ctx);
+	} else {
+		vfree(addr);
+	}
+}
+
+static char *find_array_next_entry(char *array, unsigned long *offset,
+				   unsigned long end)
+{
+	char *entry;
+	unsigned long off = *offset;
+
+	if (off >= end)
+		return NULL;
+
+	/* Check the entry is null terminated and in bound */
+	entry = array + off;
+	while (array[off]) {
+		if (++off >= end)
+			return NULL;
+	}
+
+	/* Pass the null byte for the next iteration */
+	*offset = off + 1;
+
+	return entry;
+}
+
+struct string_arr_ctx {
+	struct linux_binprm *bprm;
+	void *stack;
+};
+
+static size_t get_config_limit(size_t *config_ptr)
+{
+	lockdep_assert_held_read(&csm_rwsem_config);
+
+	/*
+	 * If execute is not enabled, do not capture arguments.
+	 * The event proto won't be sent anyway.
+	 */
+	if (!csm_execute_enabled)
+		return 0;
+
+	return *config_ptr;
+}
+
+static bool encode_current_argv(pb_ostream_t *stream, const pb_field_t *field,
+				void * const *arg)
+{
+	struct string_arr_ctx *ctx = (struct string_arr_ctx *)*arg;
+	int i;
+	struct linux_binprm *bprm = ctx->bprm;
+	unsigned long offset = bprm->p % PAGE_SIZE;
+	unsigned long end = vma_pages(bprm->vma) * PAGE_SIZE;
+	char *argv = ctx->stack;
+	char *entry;
+	size_t limit, used = 0;
+	ssize_t ret;
+
+	limit = get_config_limit(&csm_execute_config.argv_limit);
+	if (!limit)
+		return true;
+
+	for (i = 0; i < bprm->argc; i++) {
+		entry = find_array_next_entry(argv, &offset, end);
+		if (!entry)
+			return false;
+
+		ret = pb_encode_string_field_limit(stream, field,
+						   (void * const *)&entry,
+						   limit - used);
+		if (ret < 0)
+			return false;
+
+		used += ret;
+
+		if (used >= limit)
+			break;
+	}
+
+	return true;
+}
+
+static bool check_envp_allowlist(char *envp)
+{
+	bool ret = false;
+	char *strs, *equal;
+	size_t str_size, equal_pos;
+
+	/* If execute is not enabled, skip all. */
+	if (!csm_execute_enabled)
+		goto out;
+
+	/* No filter, allow all. */
+	strs = csm_execute_config.envp_allowlist;
+	if (!strs) {
+		ret = true;
+		goto out;
+	}
+
+	/*
+	 * Identify the key=value separation.
+	 * If none exists use the whole string as a key.
+	 */
+	equal = strchr(envp, '=');
+	equal_pos = equal ? (equal - envp) : strlen(envp);
+
+	/* Default to skip if no match found. */
+	ret = false;
+
+	do {
+		str_size = strlen(strs);
+
+		/*
+		 * If the filter length align with the key value equal sign,
+		 * it might be a match, check the key value.
+		 */
+		if (str_size == equal_pos &&
+		    !strncmp(strs, envp, str_size)) {
+			ret = true;
+			goto out;
+		}
+
+		strs += str_size + 1;
+	} while (*strs != 0);
+
+out:
+	return ret;
+}
+
+static bool encode_current_envp(pb_ostream_t *stream, const pb_field_t *field,
+				void * const *arg)
+{
+	struct string_arr_ctx *ctx = (struct string_arr_ctx *)*arg;
+	int i;
+	struct linux_binprm *bprm = ctx->bprm;
+	unsigned long offset = bprm->p % PAGE_SIZE;
+	unsigned long end = vma_pages(bprm->vma) * PAGE_SIZE;
+	char *argv = ctx->stack;
+	char *entry;
+	size_t limit, used = 0;
+	ssize_t ret;
+
+	limit = get_config_limit(&csm_execute_config.envp_limit);
+	if (!limit)
+		return true;
+
+	/* Skip arguments */
+	for (i = 0; i < bprm->argc; i++) {
+		if (!find_array_next_entry(argv, &offset, end))
+			return false;
+	}
+
+	for (i = 0; i < bprm->envc; i++) {
+		entry = find_array_next_entry(argv, &offset, end);
+		if (!entry)
+			return false;
+
+		if (!check_envp_allowlist(entry))
+			continue;
+
+		ret = pb_encode_string_field_limit(stream, field,
+						   (void * const *)&entry,
+						   limit - used);
+		if (ret < 0)
+			return false;
+
+		used += ret;
+
+		if (used >= limit)
+			break;
+	}
+
+	return true;
+}
+
+static bool is_overlayfs_mounted(struct file *file)
+{
+	struct vfsmount *mnt;
+	struct super_block *mnt_sb;
+
+	mnt = file->f_path.mnt;
+	if (mnt == NULL)
+		return false;
+
+	mnt_sb = mnt->mnt_sb;
+	if (mnt_sb == NULL || mnt_sb->s_magic != OVERLAYFS_SUPER_MAGIC)
+		return false;
+
+	return true;
+}
+
+/*
+ * Before the process starts, identify a possible container by checking if the
+ * task is on a pid namespace and the target file is using an overlayfs mounting
+ * point. This check is valid for COS and GKE but not all existing containers.
+ */
+static bool is_possible_container(struct task_struct *task,
+				  struct file *file)
+{
+	if (task_active_pid_ns(task) == &init_pid_ns)
+		return false;
+
+	return is_overlayfs_mounted(file);
+}
+
+/*
+ * Generates a random identifier for this boot instance.
+ * This identifier is generated only when needed to increase the entropy
+ * available compared to doing it at early boot.
+ */
+static u32 get_machine_id(void)
+{
+	int machineid, old;
+
+	machineid = atomic_read(&machine_rand);
+
+	if (unlikely(machineid == 0)) {
+		machineid = (int)get_random_int();
+		if (machineid == 0)
+			machineid = 1;
+		old = atomic_cmpxchg(&machine_rand, 0, machineid);
+
+		/* If someone beat us, use their value. */
+		if (old != 0)
+			machineid = old;
+	}
+
+	return (u32)machineid;
+}
+
+/*
+ * Generate a 128-bit unique identifier for the process by appending:
+ *  - A machine identifier unique per boot.
+ *  - The start time of the process in nanoseconds.
+ *  - The tgid for the set of threads in a process.
+ */
+static int get_process_uuid(struct task_struct *task, char *buffer, size_t size)
+{
+	union process_uuid *id = (union process_uuid *)buffer;
+
+	memset(buffer, 0, size);
+
+	if (WARN_ON(size < PROCESS_UUID_SIZE))
+		return -EINVAL;
+
+	id->machineid = get_machine_id();
+	id->start_time = ktime_mono_to_real(task->group_leader->start_time);
+	id->tgid = task_tgid_nr(task);
+
+	return 0;
+}
+
+int get_process_uuid_by_pid(pid_t pid_nr, char *buffer, size_t size)
+{
+	int err;
+	struct task_struct *task = NULL;
+
+	rcu_read_lock();
+	task = find_task_by_pid_ns(pid_nr, &init_pid_ns);
+	if (!task) {
+		err = -ENOENT;
+		goto out;
+	}
+	err = get_process_uuid(task, buffer, size);
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+static int get_process_uuid_from_xattr(struct file *file, char *buffer,
+				       size_t size)
+{
+	struct dentry *dentry;
+	int err;
+	struct file_provenance prov;
+	union process_uuid *id = (union process_uuid *)buffer;
+
+	memset(buffer, 0, size);
+
+	if (WARN_ON(size < PROCESS_UUID_SIZE))
+		return -EINVAL;
+
+	/* The file is part of overlayfs on the upper layer. */
+	if (!is_overlayfs_mounted(file))
+		return -ENODATA;
+
+	dentry = ovl_dentry_upper(file->f_path.dentry);
+	if (!dentry)
+		return -ENODATA;
+
+	err = __vfs_getxattr(dentry, dentry->d_inode,
+			     XATTR_SECURITY_CSM, &prov, sizeof(prov));
+	/* returns -ENODATA if the xattr does not exist. */
+	if (err < 0)
+		return err;
+	if (err != sizeof(prov)) {
+		pr_err("unexpected size for xattr: %zu -> %d\n",
+		       size, err);
+		return -ENODATA;
+	}
+
+	id->machineid = get_machine_id();
+	id->start_time = prov.start_time;
+	id->tgid = prov.tgid;
+	return 0;
+}
+
+u64 csm_set_contid(struct task_struct *task)
+{
+	u64 cid;
+	struct pid_namespace *ns;
+
+	ns = task_active_pid_ns(task);
+	if (WARN_ON(!task->audit) || WARN_ON(!ns))
+		return AUDIT_CID_UNSET;
+
+	cid = atomic_inc_return(&contid);
+	task->audit->contid = cid;
+
+	/*
+	 * If the namespace container-id is not set, use the one assigned
+	 * to the first process created.
+	 */
+	cmpxchg(&ns->cid, 0, cid);
+	return cid;
+}
+
+u64 csm_get_ns_contid(struct pid_namespace *ns)
+{
+	if (!ns || !ns->cid)
+		return AUDIT_CID_UNSET;
+
+	return ns->cid;
+}
+
+union ip_data {
+	struct in_addr ip4;
+	struct in6_addr ip6;
+};
+
+struct file_data {
+	void *allocated;
+	union ip_data local;
+	union ip_data remote;
+	char modified_uuid[PROCESS_UUID_SIZE];
+};
+
+static void free_file_data(struct file_data *fdata)
+{
+	free_page((unsigned long)fdata->allocated);
+	fdata->allocated = NULL;
+}
+
+static void fill_socket_description(struct sockaddr_storage *saddr,
+				   union ip_data *idata,
+				   schema_SocketIp *schema_socketip)
+{
+	struct sockaddr_in *sin4 = (struct sockaddr_in *)saddr;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)saddr;
+
+	schema_socketip->family = saddr->ss_family;
+
+	switch (saddr->ss_family) {
+	case AF_INET:
+		schema_socketip->port = ntohs(sin4->sin_port);
+		idata->ip4 = sin4->sin_addr;
+		schema_socketip->ip.funcs.encode = pb_encode_ip4;
+		schema_socketip->ip.arg = &idata->ip4;
+		break;
+	case AF_INET6:
+		schema_socketip->port = ntohs(sin6->sin6_port);
+		idata->ip6 = sin6->sin6_addr;
+		schema_socketip->ip.funcs.encode = pb_encode_ip6;
+		schema_socketip->ip.arg = &idata->ip6;
+		break;
+	}
+}
+
+static int fill_file_overlayfs(struct file *file, schema_File *schema_file,
+			       struct file_data *fdata)
+{
+	struct dentry *dentry;
+	int err;
+	schema_Overlay *overlayfs;
+
+	/* If not an overlayfs superblock, done. */
+	if (!is_overlayfs_mounted(file))
+		return 0;
+
+	dentry = file->f_path.dentry;
+	schema_file->which_filesystem = schema_File_overlayfs_tag;
+	overlayfs = &schema_file->filesystem.overlayfs;
+	overlayfs->lower_layer = ovl_dentry_lower(dentry);
+	overlayfs->upper_layer = ovl_dentry_upper(dentry);
+
+	err = get_process_uuid_from_xattr(file, fdata->modified_uuid,
+					  sizeof(fdata->modified_uuid));
+	/* If there is no xattr, just skip the modified_uuid field. */
+	if (err == -ENODATA)
+		return 0;
+	if (err < 0)
+		return err;
+
+	overlayfs->modified_uuid.funcs.encode = pb_encode_uuid_field;
+	overlayfs->modified_uuid.arg = fdata->modified_uuid;
+	return 0;
+}
+
+static int fill_file_description(struct file *file, schema_File *schema_file,
+				 struct file_data *fdata)
+{
+	char *buf;
+	int err;
+	u32 mode;
+	char *path;
+	struct socket *socket;
+	schema_Socket *socketfs;
+	struct sockaddr_storage saddr;
+
+	memset(fdata, 0, sizeof(*fdata));
+
+	if (file == NULL)
+		return 0;
+
+	schema_file->ino = file_inode(file)->i_ino;
+	mode = file_inode(file)->i_mode;
+
+	/* For pipes, no need to resolve the path. */
+	if (S_ISFIFO(mode))
+		return 0;
+
+	if (S_ISSOCK(mode)) {
+		socket = (struct socket *)file->private_data;
+		socketfs = &schema_file->filesystem.socket;
+
+		/* Local socket */
+		err = kernel_getsockname(socket, (struct sockaddr *)&saddr);
+		if (err >= 0) {
+			socketfs->has_local = true;
+			fill_socket_description(&saddr, &fdata->local,
+						&socketfs->local);
+		}
+
+		/* Remote socket, might not be connected. */
+		err = kernel_getpeername(socket, (struct sockaddr *)&saddr);
+		if (err >= 0) {
+			socketfs->has_remote = true;
+			fill_socket_description(&saddr, &fdata->remote,
+						&socketfs->remote);
+		}
+
+		schema_file->which_filesystem = schema_File_socket_tag;
+		return 0;
+	}
+
+	/*
+	 * From this point, we care about all the other types of files as their
+	 * path provides interesting insight.
+	 */
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	fdata->allocated = buf;
+
+	path = d_path(&file->f_path, buf, PAGE_SIZE);
+	if (IS_ERR(path)) {
+		free_file_data(fdata);
+		return PTR_ERR(path);
+	}
+
+	schema_file->fullpath.funcs.encode = pb_encode_string_field;
+	schema_file->fullpath.arg = path; /* buf is freed in free_file_data. */
+
+	err = fill_file_overlayfs(file, schema_file, fdata);
+	if (err) {
+		free_file_data(fdata);
+		return err;
+	}
+
+	return 0;
+}
+
+static int fill_stream_description(schema_Descriptor *desc, int fd,
+				   struct file_data *fdata)
+{
+	struct fd sfd;
+	struct file *file;
+	int err = 0;
+
+	sfd = fdget(fd);
+	file = sfd.file;
+
+	if (file == NULL) {
+		memset(fdata, 0, sizeof(*fdata));
+		goto end;
+	}
+
+	desc->mode = file_inode(file)->i_mode;
+	desc->has_file = true;
+	err = fill_file_description(file, &desc->file, fdata);
+
+end:
+	fdput(sfd);
+	return err;
+}
+
+static int populate_proc_uuid_common(schema_Process *proc, char *uuid,
+				     size_t uuid_size, char *parent_uuid,
+				     size_t parent_uuid_size,
+				     struct task_struct *task)
+{
+	int err;
+	struct task_struct *parent;
+	/* Generate unique identifier for the process and its parent */
+	err = get_process_uuid(task, uuid, uuid_size);
+	if (err)
+		return err;
+
+	proc->uuid.funcs.encode = pb_encode_uuid_field;
+	proc->uuid.arg = uuid;
+
+	rcu_read_lock();
+
+	if (!pid_alive(task))
+		goto out;
+	/*
+	 * I don't think this needs to be task_rcu_dereference because
+	 * real_parent is only supposed to be accessed using RCU.
+	 */
+	parent = rcu_dereference(task->real_parent);
+
+	if (parent) {
+		err = get_process_uuid(parent, parent_uuid, parent_uuid_size);
+		if (!err) {
+			proc->parent_uuid.funcs.encode = pb_encode_uuid_field;
+			proc->parent_uuid.arg = parent_uuid;
+		}
+	}
+
+out:
+	rcu_read_unlock();
+
+	return err;
+}
+
+/* Populate the fields that we always want to set in Process messages. */
+static int populate_proc_common(schema_Process *proc, char *uuid,
+				size_t uuid_size, char *parent_uuid,
+				size_t parent_uuid_size,
+				struct task_struct *task)
+{
+	u64 cid;
+	struct pid_namespace *ns = task_active_pid_ns(task);
+
+	/* Container identifier for the current namespace. */
+	proc->container_id = csm_get_ns_contid(ns);
+
+	/*
+	 * If the process container-id is different, the process tree is part of
+	 * a different session within the namespace (kubectl/docker exec,
+	 * liveness probe or others).
+	 */
+	cid = audit_get_contid(task);
+	if (proc->container_id != cid)
+		proc->exec_session_id = cid;
+
+	/* Add information about pid in different namespaces */
+	proc->pid = task_tgid_nr(task);
+	proc->parent_pid = task_ppid_nr(task);
+	proc->container_pid = task_tgid_nr_ns(task, ns);
+	proc->container_parent_pid = task_ppid_nr_ns(task, ns);
+
+	return populate_proc_uuid_common(proc, uuid, uuid_size, parent_uuid,
+					 parent_uuid_size, task);
+}
+
+int csm_bprm_check_security(struct linux_binprm *bprm)
+{
+	char uuid[PROCESS_UUID_SIZE];
+	char parent_uuid[PROCESS_UUID_SIZE];
+	int err;
+	schema_Event event = {};
+	schema_Process *proc;
+	struct string_arr_ctx argv_ctx;
+	void *stack = NULL, *ctx = NULL;
+	u64 cid;
+	struct file_data path_data = {};
+	struct file_data stdin_data = {};
+	struct file_data stdout_data = {};
+	struct file_data stderr_data = {};
+
+	/*
+	 * Always create a container-id for containerized processes.
+	 * If the LSM is enabled later, we can track existing containers.
+	 */
+	cid = audit_get_contid(current);
+
+	if (cid == AUDIT_CID_UNSET) {
+		if (!is_possible_container(current, bprm->file))
+			return 0;
+
+		cid = csm_set_contid(current);
+
+		if (cid == AUDIT_CID_UNSET)
+			return 0;
+	}
+
+	if (!csm_execute_enabled)
+		return 0;
+
+	/* The interpreter will call us again with more context. */
+	if (bprm->buf[0] == '#' && bprm->buf[1] == '!')
+		return 0;
+
+	proc = &event.event.execute.proc;
+	err = populate_proc_common(proc, uuid, sizeof(uuid), parent_uuid,
+				   sizeof(parent_uuid), current);
+	if (err)
+		goto out_free_buf;
+
+	proc->creation_timestamp = ktime_get_real_ns();
+
+	/* Provide information about the launched binary. */
+	proc->has_binary = true;
+	err = fill_file_description(bprm->file, &proc->binary, &path_data);
+	if (err)
+		goto out_free_buf;
+
+	/* Information about streams */
+	proc->has_streams = true;
+
+	proc->streams.has_stdin = true;
+	err = fill_stream_description(&proc->streams.stdin, STDIN_FILENO,
+				      &stdin_data);
+	if (err)
+		goto out_free_buf;
+
+	proc->streams.has_stdout = true;
+	err = fill_stream_description(&proc->streams.stdout, STDOUT_FILENO,
+				      &stdout_data);
+	if (err)
+		goto out_free_buf;
+
+	proc->streams.has_stderr = true;
+	err = fill_stream_description(&proc->streams.stderr, STDERR_FILENO,
+				      &stderr_data);
+	if (err)
+		goto out_free_buf;
+
+	stack = kmap_argument_stack(bprm, &ctx);
+	if (!stack) {
+		err = -EFAULT;
+		goto out_free_buf;
+	}
+
+	/* Capture process argument */
+	argv_ctx.bprm = bprm;
+	argv_ctx.stack = stack;
+	proc->args.argv.funcs.encode = encode_current_argv;
+	proc->args.argv.arg = &argv_ctx;
+
+	/* Capture process environment variables */
+	proc->args.envp.funcs.encode = encode_current_envp;
+	proc->args.envp.arg = &argv_ctx;
+
+	event.which_event = schema_Event_execute_tag;
+	event.event.execute.has_proc = true;
+	proc->has_args = true;
+
+	/*
+	 * Configurations options are checked when computing the serialized
+	 * protobufs.
+	 */
+	down_read(&csm_rwsem_config);
+	err = csm_sendeventproto(schema_Event_fields, &event);
+	up_read(&csm_rwsem_config);
+
+	if (err)
+		pr_err("csm_sendeventproto returned %d on execve\n", err);
+	err = 0;
+
+out_free_buf:
+	kunmap_argument_stack(bprm, stack, ctx);
+	free_file_data(&path_data);
+	free_file_data(&stdin_data);
+	free_file_data(&stdout_data);
+	free_file_data(&stderr_data);
+
+	/*
+	 * On failure, enforce it only if the execute config is enabled.
+	 * If the collector was disabled, prefer to succeed to not impact the
+	 * system.
+	 */
+	if (unlikely(err < 0 && !csm_execute_enabled))
+		err = 0;
+
+	return err;
+}
+
+/* Create a clone event when a new task leader is created. */
+void csm_task_post_alloc(struct task_struct *task)
+{
+	int err;
+	char uuid[PROCESS_UUID_SIZE];
+	char parent_uuid[PROCESS_UUID_SIZE];
+	schema_Event event = {};
+	schema_Process *proc;
+
+	if (!csm_execute_enabled ||
+	    audit_get_contid(task) == AUDIT_CID_UNSET ||
+	    !thread_group_leader(task))
+		return;
+
+	proc = &event.event.clone.proc;
+
+	err = populate_proc_uuid_common(proc, uuid, sizeof(uuid), parent_uuid,
+					sizeof(parent_uuid), task);
+
+	event.which_event = schema_Event_clone_tag;
+	event.event.clone.has_proc = true;
+	err = csm_sendeventproto(schema_Event_fields, &event);
+	if (err)
+		pr_err("csm_sendeventproto returned %d on exit\n", err);
+}
+
+/*
+ * This LSM hook callback doesn't exist upstream and is called only when the
+ * last thread of a thread group exit.
+ */
+void csm_task_exit(struct task_struct *task)
+{
+	int err;
+	schema_Event event = {};
+	schema_ExitEvent *exit;
+	char uuid[PROCESS_UUID_SIZE];
+
+	if (!csm_execute_enabled ||
+	    audit_get_contid(task) == AUDIT_CID_UNSET)
+		return;
+
+	exit = &event.event.exit;
+
+	/* Fetch the unique identifier for this process */
+	err = get_process_uuid(task, uuid, sizeof(uuid));
+	if (err) {
+		pr_err("failed to get process uuid on exit\n");
+		return;
+	}
+
+	exit->process_uuid.funcs.encode = pb_encode_uuid_field;
+	exit->process_uuid.arg = uuid;
+
+	event.which_event = schema_Event_exit_tag;
+
+	err = csm_sendeventproto(schema_Event_fields, &event);
+	if (err)
+		pr_err("csm_sendeventproto returned %d on exit\n", err);
+}
+
+int csm_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
+		unsigned long prot)
+{
+	char uuid[PROCESS_UUID_SIZE];
+	char parent_uuid[PROCESS_UUID_SIZE];
+	int err;
+	schema_Event event = {};
+	schema_MemoryExecEvent *memexec;
+	u64 cid;
+	struct file_data path_data = {};
+
+	cid = audit_get_contid(current);
+
+	if (!csm_memexec_enabled ||
+	    !(prot & PROT_EXEC) ||
+	    vma->vm_file == NULL ||
+	    cid == AUDIT_CID_UNSET)
+		return 0;
+
+	memexec = &event.event.memexec;
+
+	err = fill_file_description(vma->vm_file, &memexec->mapped_file,
+				    &path_data);
+	if (err)
+		return err;
+
+	err = populate_proc_common(&memexec->proc, uuid, sizeof(uuid),
+				   parent_uuid, sizeof(parent_uuid), current);
+	if (err)
+		goto out;
+
+	memexec->prot_exec_timestamp = ktime_get_real_ns();
+	memexec->new_flags = prot;
+	memexec->req_flags = reqprot;
+	memexec->old_vm_flags = vma->vm_flags;
+
+	memexec->action = schema_MemoryExecEvent_Action_MPROTECT;
+	memexec->start_addr = vma->vm_start;
+	memexec->end_addr = vma->vm_end;
+
+	event.which_event = schema_Event_memexec_tag;
+	event.event.memexec.has_proc = true;
+	event.event.memexec.has_mapped_file = true;
+
+	err = csm_sendeventproto(schema_Event_fields, &event);
+	if (err)
+		pr_err("csm_sendeventproto returned %d on mprotect\n", err);
+	err = 0;
+
+	if (unlikely(err < 0 && !csm_memexec_enabled))
+		err = 0;
+
+out:
+	free_file_data(&path_data);
+	return err;
+}
+
+int csm_mmap_file(struct file *file, unsigned long reqprot,
+		unsigned long prot, unsigned long flags)
+{
+	char uuid[PROCESS_UUID_SIZE];
+	char parent_uuid[PROCESS_UUID_SIZE];
+	int err;
+	schema_Event event = {};
+	schema_MemoryExecEvent *memexec;
+	struct file *exe_file;
+	u64 cid;
+	struct file_data path_data = {};
+
+	cid = audit_get_contid(current);
+
+	if (!csm_memexec_enabled ||
+	    !(prot & PROT_EXEC) ||
+	    file == NULL ||
+	    cid == AUDIT_CID_UNSET)
+		return 0;
+
+	memexec = &event.event.memexec;
+	err = fill_file_description(file, &memexec->mapped_file,
+				    &path_data);
+	if (err)
+		return err;
+
+	err = populate_proc_common(&memexec->proc, uuid, sizeof(uuid),
+				   parent_uuid, sizeof(parent_uuid), current);
+	if (err)
+		goto out;
+
+	/* get_mm_exe_file does its own locking on mm_sem. */
+	exe_file = get_mm_exe_file(current->mm);
+	if (exe_file) {
+		if (path_equal(&file->f_path, &exe_file->f_path))
+			memexec->is_initial_mmap = 1;
+		fput(exe_file);
+	}
+
+	memexec->prot_exec_timestamp = ktime_get_real_ns();
+	memexec->new_flags = prot;
+	memexec->req_flags = reqprot;
+	memexec->mmap_flags = flags;
+	memexec->action = schema_MemoryExecEvent_Action_MMAP_FILE;
+	event.which_event = schema_Event_memexec_tag;
+	event.event.memexec.has_proc = true;
+	event.event.memexec.has_mapped_file = true;
+
+	err = csm_sendeventproto(schema_Event_fields, &event);
+	if (err)
+		pr_err("csm_sendeventproto returned %d on mmap_file\n", err);
+	err = 0;
+
+	if (unlikely(err < 0 && !csm_memexec_enabled))
+		err = 0;
+
+out:
+	free_file_data(&path_data);
+	return err;
+}
+
+void csm_file_pre_free(struct file *file)
+{
+	struct dentry *dentry;
+	int err;
+	struct file_provenance prov;
+
+	/* The file was opened to be modified and the LSM is enabled */
+	if (!(file->f_mode & FMODE_WRITE) ||
+	    !csm_enabled)
+		return;
+
+	/* The current process is containerized. */
+	if (audit_get_contid(current) == AUDIT_CID_UNSET)
+		return;
+
+	/* The file is part of overlayfs on the upper layer. */
+	if (!is_overlayfs_mounted(file))
+		return;
+
+	dentry = ovl_dentry_upper(file->f_path.dentry);
+	if (!dentry)
+		return;
+
+	err = __vfs_getxattr(dentry, dentry->d_inode, XATTR_SECURITY_CSM,
+			     NULL, 0);
+	if (err != -ENODATA) {
+		if (err < 0)
+			pr_err("failed to get security attribute: %d\n", err);
+		return;
+	}
+
+	prov.tgid = task_tgid_nr(current);
+	prov.start_time = ktime_mono_to_real(current->group_leader->start_time);
+
+	err = __vfs_setxattr(dentry, dentry->d_inode, XATTR_SECURITY_CSM, &prov,
+			     sizeof(prov), 0);
+	if (err < 0)
+		pr_err("failed to set security attribute: %d\n", err);
+}
+
+/*
+ * Based off of fs/proc/base.c:next_tgid
+ *
+ * next_thread_group_leader returns the task_struct of the next task with a pid
+ * greater than or equal to tgid. The reference count is increased so that
+ * rcu_read_unlock may be called, and preemption reenabled.
+ */
+static struct task_struct *next_thread_group_leader(pid_t *tgid)
+{
+	struct pid *pid;
+	struct task_struct *task;
+
+	cond_resched();
+	rcu_read_lock();
+retry:
+	task = NULL;
+	pid = find_ge_pid(*tgid, &init_pid_ns);
+	if (pid) {
+		*tgid = pid_nr_ns(pid, &init_pid_ns);
+		task = pid_task(pid, PIDTYPE_PID);
+		if (!task || !has_group_leader_pid(task) ||
+		    audit_get_contid(task) == AUDIT_CID_UNSET) {
+			(*tgid) += 1;
+			goto retry;
+		}
+
+		/*
+		 * Increment the reference count on the task before leaving
+		 * the RCU grace period.
+		 */
+		get_task_struct(task);
+		(*tgid) += 1;
+	}
+
+	rcu_read_unlock();
+	return task;
+}
+
+void delayed_enumerate_processes(struct work_struct *work)
+{
+	pid_t tgid = 0;
+	struct task_struct *task;
+	struct csm_enumerate_processes_work_data *wd = container_of(
+		work, struct csm_enumerate_processes_work_data, work);
+	int wd_enumeration_count = wd->enumeration_count;
+
+	kfree(wd);
+	wd = NULL;
+	work = NULL;
+
+	/*
+	 * Try for only a single enumeration routine at a time, as long as the
+	 * execute collector is enabled.
+	 */
+	while ((wd_enumeration_count == atomic_read(&enumeration_count)) &&
+	       READ_ONCE(csm_execute_enabled) &&
+	       (task = next_thread_group_leader(&tgid))) {
+		int err;
+		char uuid[PROCESS_UUID_SIZE];
+		char parent_uuid[PROCESS_UUID_SIZE];
+		struct file *exe_file = NULL;
+		struct file_data path_data = {};
+		schema_Event event = {};
+		schema_Process *proc = &event.event.enumproc.proc;
+
+		exe_file = get_task_exe_file(task);
+		if (!exe_file) {
+			pr_err("failed to get enumerated process executable, pid: %u\n",
+			       task_pid_nr(task));
+			goto next;
+		}
+
+		proc->has_binary = true;
+		err = fill_file_description(exe_file, &proc->binary,
+					    &path_data);
+		if (err) {
+			pr_err("failed to fill enumerated process %u executable description: %d\n",
+			       task_pid_nr(task), err);
+			goto next;
+		}
+
+		err = populate_proc_common(proc, uuid, sizeof(uuid),
+					   parent_uuid, sizeof(parent_uuid),
+					   task);
+		if (err) {
+			pr_err("failed to set pid %u common fields: %d\n",
+			       task_pid_nr(task), err);
+			goto next;
+		}
+
+		if (task->flags & PF_EXITING)
+			goto next;
+
+		event.which_event = schema_Event_enumproc_tag;
+		event.event.execute.has_proc = true;
+		err = csm_sendeventproto(schema_Event_fields,
+					 &event);
+		if (err) {
+			pr_err("failed to send pid %u enumerated process: %d\n",
+			       task_pid_nr(task), err);
+			goto next;
+		}
+next:
+		free_file_data(&path_data);
+		if (exe_file)
+			fput(exe_file);
+
+		put_task_struct(task);
+	}
+}
+
+void csm_enumerate_processes(unsigned long const config_version)
+{
+	struct csm_enumerate_processes_work_data *wd;
+
+	wd = kmalloc(sizeof(*wd), GFP_KERNEL);
+	if (!wd)
+		return;
+
+	INIT_WORK(&wd->work, delayed_enumerate_processes);
+	wd->enumeration_count = atomic_add_return(1, &enumeration_count);
+	schedule_work(&wd->work);
+}

diff --git a/security/container/process.h b/security/container/process.h
new file mode 100644
index 0000000..1c98134
--- /dev/null
+++ b/security/container/process.h

@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Container Security Monitor module
+ *
+ * Copyright (c) 2019 Google, Inc
+ */
+
+void csm_enumerate_processes(void);

diff --git a/security/container/protos/Makefile b/security/container/protos/Makefile
new file mode 100644
index 0000000..a88068b
--- /dev/null
+++ b/security/container/protos/Makefile

@@ -0,0 +1,10 @@
+subdir-$(CONFIG_SECURITY_CONTAINER_MONITOR) += nanopb
+
+obj-$(CONFIG_SECURITY_CONTAINER_MONITOR) += nanopb/
+obj-$(CONFIG_SECURITY_CONTAINER_MONITOR) += protos.o
+
+protos-y := config.pb.o event.pb.o
+
+ccflags-y := -I$(srctree)/security/container/protos \
+	-I$(srctree)/security/container/protos/nanopb \
+	$(PB_CCFLAGS)

diff --git a/security/container/protos/README b/security/container/protos/README
new file mode 100644
index 0000000..1b0628a
--- /dev/null
+++ b/security/container/protos/README

@@ -0,0 +1,18 @@
+This document provides guidance on how to change the protos used in this directory.
+
+Any change made to a proto file require to reformat it and regenerate nanopb
+sources. It also requires the proto files to be compatible to previously released versions.
+
+To reformat any proto file run: "clang-format -style=Google -i <file.proto>"
+
+To regenerate nanopb files:
+ - Install protoc
+   - apt-get install protobuf-compiler
+ - Clone/setup nanopb for version 0.3.9.1 (or clone the internal depot)
+   - git clone --depth=1 https://github.com/nanopb/nanopb.git
+   - cd nanopb
+   - git fetch --tags
+   - git checkout tags/0.3.9.1
+   - make -C generator/proto
+ - Run protoc with the nanopb definition
+   - protoc --plugin=<path_to_nanopb>/generator/protoc-gen-nanopb --nanopb_out=<path_to_linux>/security/container/protos/ <path_to_linux>/security/container/protos/<file.proto> --proto_path=<path_to_linux>/security/container/protos

diff --git a/security/container/protos/config.pb.c b/security/container/protos/config.pb.c
new file mode 100644
index 0000000..08436ee
--- /dev/null
+++ b/security/container/protos/config.pb.c

@@ -0,0 +1,25 @@
+/* Automatically generated nanopb constant definitions */
+/* Generated by nanopb-0.4.5 */
+
+#include "config.pb.h"
+#if PB_PROTO_HEADER_VERSION != 40
+#error Regenerate this file with the current version of nanopb generator.
+#endif
+
+PB_BIND(schema_ContainerCollectorConfig, schema_ContainerCollectorConfig, AUTO)
+
+
+PB_BIND(schema_ExecuteCollectorConfig, schema_ExecuteCollectorConfig, AUTO)
+
+
+PB_BIND(schema_MemExecCollectorConfig, schema_MemExecCollectorConfig, AUTO)
+
+
+PB_BIND(schema_ConfigurationRequest, schema_ConfigurationRequest, AUTO)
+
+
+PB_BIND(schema_ConfigurationResponse, schema_ConfigurationResponse, AUTO)
+
+
+
+

diff --git a/security/container/protos/config.pb.h b/security/container/protos/config.pb.h
new file mode 100644
index 0000000..893961e
--- /dev/null
+++ b/security/container/protos/config.pb.h

@@ -0,0 +1,157 @@
+/* Automatically generated nanopb header */
+/* Generated by nanopb-0.4.5 */
+
+#ifndef PB_SCHEMA_CONFIG_PB_H_INCLUDED
+#define PB_SCHEMA_CONFIG_PB_H_INCLUDED
+#include <pb.h>
+
+#if PB_PROTO_HEADER_VERSION != 40
+#error Regenerate this file with the current version of nanopb generator.
+#endif
+
+/* Enum definitions */
+typedef enum _schema_ConfigurationResponse_ErrorCode { 
+    schema_ConfigurationResponse_ErrorCode_NO_ERROR = 0, 
+    schema_ConfigurationResponse_ErrorCode_UNKNOWN = 2 
+} schema_ConfigurationResponse_ErrorCode;
+
+/* Struct definitions */
+/* Report success or failure of previous ConfigurationRequest */
+typedef struct _schema_ConfigurationResponse { 
+    schema_ConfigurationResponse_ErrorCode error; 
+    pb_callback_t msg; 
+    uint64_t version; /* Version of the LSM */
+    uint32_t kernel_version; /* LINUX_VERSION_CODE */
+} schema_ConfigurationResponse;
+
+/* Collect information about running containers */
+typedef struct _schema_ContainerCollectorConfig { 
+    bool enabled; 
+} schema_ContainerCollectorConfig;
+
+typedef struct _schema_ExecuteCollectorConfig { 
+    bool enabled; 
+    /* truncate argv/envp if cumulative length exceeds limit */
+    uint32_t argv_limit; 
+    uint32_t envp_limit; 
+    /* If specified, only report the named environment variables.  An
+ empty envp_allowlist indicates that all environment variables
+ should be reported up to a cumulative total of envp_limit bytes. */
+    pb_callback_t envp_allowlist; 
+} schema_ExecuteCollectorConfig;
+
+/* Collect information about executable memory mappings. */
+typedef struct _schema_MemExecCollectorConfig { 
+    bool enabled; 
+} schema_MemExecCollectorConfig;
+
+/* Convey configuration information to Guest LSM */
+typedef struct _schema_ConfigurationRequest { 
+    bool has_container_config;
+    schema_ContainerCollectorConfig container_config; 
+    bool has_execute_config;
+    schema_ExecuteCollectorConfig execute_config; 
+    bool has_memexec_config;
+    schema_MemExecCollectorConfig memexec_config; 
+} schema_ConfigurationRequest;
+
+
+/* Helper constants for enums */
+#define _schema_ConfigurationResponse_ErrorCode_MIN schema_ConfigurationResponse_ErrorCode_NO_ERROR
+#define _schema_ConfigurationResponse_ErrorCode_MAX schema_ConfigurationResponse_ErrorCode_UNKNOWN
+#define _schema_ConfigurationResponse_ErrorCode_ARRAYSIZE ((schema_ConfigurationResponse_ErrorCode)(schema_ConfigurationResponse_ErrorCode_UNKNOWN+1))
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Initializer values for message structs */
+#define schema_ContainerCollectorConfig_init_default {0}
+#define schema_ExecuteCollectorConfig_init_default {0, 0, 0, {{NULL}, NULL}}
+#define schema_MemExecCollectorConfig_init_default {0}
+#define schema_ConfigurationRequest_init_default {false, schema_ContainerCollectorConfig_init_default, false, schema_ExecuteCollectorConfig_init_default, false, schema_MemExecCollectorConfig_init_default}
+#define schema_ConfigurationResponse_init_default {_schema_ConfigurationResponse_ErrorCode_MIN, {{NULL}, NULL}, 0, 0}
+#define schema_ContainerCollectorConfig_init_zero {0}
+#define schema_ExecuteCollectorConfig_init_zero  {0, 0, 0, {{NULL}, NULL}}
+#define schema_MemExecCollectorConfig_init_zero  {0}
+#define schema_ConfigurationRequest_init_zero    {false, schema_ContainerCollectorConfig_init_zero, false, schema_ExecuteCollectorConfig_init_zero, false, schema_MemExecCollectorConfig_init_zero}
+#define schema_ConfigurationResponse_init_zero   {_schema_ConfigurationResponse_ErrorCode_MIN, {{NULL}, NULL}, 0, 0}
+
+/* Field tags (for use in manual encoding/decoding) */
+#define schema_ConfigurationResponse_error_tag   1
+#define schema_ConfigurationResponse_msg_tag     2
+#define schema_ConfigurationResponse_version_tag 3
+#define schema_ConfigurationResponse_kernel_version_tag 4
+#define schema_ContainerCollectorConfig_enabled_tag 1
+#define schema_ExecuteCollectorConfig_enabled_tag 1
+#define schema_ExecuteCollectorConfig_argv_limit_tag 2
+#define schema_ExecuteCollectorConfig_envp_limit_tag 3
+#define schema_ExecuteCollectorConfig_envp_allowlist_tag 4
+#define schema_MemExecCollectorConfig_enabled_tag 1
+#define schema_ConfigurationRequest_container_config_tag 1
+#define schema_ConfigurationRequest_execute_config_tag 2
+#define schema_ConfigurationRequest_memexec_config_tag 3
+
+/* Struct field encoding specification for nanopb */
+#define schema_ContainerCollectorConfig_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, BOOL,     enabled,           1)
+#define schema_ContainerCollectorConfig_CALLBACK NULL
+#define schema_ContainerCollectorConfig_DEFAULT NULL
+
+#define schema_ExecuteCollectorConfig_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, BOOL,     enabled,           1) \
+X(a, STATIC,   SINGULAR, UINT32,   argv_limit,        2) \
+X(a, STATIC,   SINGULAR, UINT32,   envp_limit,        3) \
+X(a, CALLBACK, REPEATED, STRING,   envp_allowlist,    4)
+#define schema_ExecuteCollectorConfig_CALLBACK pb_default_field_callback
+#define schema_ExecuteCollectorConfig_DEFAULT NULL
+
+#define schema_MemExecCollectorConfig_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, BOOL,     enabled,           1)
+#define schema_MemExecCollectorConfig_CALLBACK NULL
+#define schema_MemExecCollectorConfig_DEFAULT NULL
+
+#define schema_ConfigurationRequest_FIELDLIST(X, a) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  container_config,   1) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  execute_config,    2) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  memexec_config,    3)
+#define schema_ConfigurationRequest_CALLBACK NULL
+#define schema_ConfigurationRequest_DEFAULT NULL
+#define schema_ConfigurationRequest_container_config_MSGTYPE schema_ContainerCollectorConfig
+#define schema_ConfigurationRequest_execute_config_MSGTYPE schema_ExecuteCollectorConfig
+#define schema_ConfigurationRequest_memexec_config_MSGTYPE schema_MemExecCollectorConfig
+
+#define schema_ConfigurationResponse_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, UENUM,    error,             1) \
+X(a, CALLBACK, SINGULAR, STRING,   msg,               2) \
+X(a, STATIC,   SINGULAR, UINT64,   version,           3) \
+X(a, STATIC,   SINGULAR, UINT32,   kernel_version,    4)
+#define schema_ConfigurationResponse_CALLBACK pb_default_field_callback
+#define schema_ConfigurationResponse_DEFAULT NULL
+
+extern const pb_msgdesc_t schema_ContainerCollectorConfig_msg;
+extern const pb_msgdesc_t schema_ExecuteCollectorConfig_msg;
+extern const pb_msgdesc_t schema_MemExecCollectorConfig_msg;
+extern const pb_msgdesc_t schema_ConfigurationRequest_msg;
+extern const pb_msgdesc_t schema_ConfigurationResponse_msg;
+
+/* Defines for backwards compatibility with code written before nanopb-0.4.0 */
+#define schema_ContainerCollectorConfig_fields &schema_ContainerCollectorConfig_msg
+#define schema_ExecuteCollectorConfig_fields &schema_ExecuteCollectorConfig_msg
+#define schema_MemExecCollectorConfig_fields &schema_MemExecCollectorConfig_msg
+#define schema_ConfigurationRequest_fields &schema_ConfigurationRequest_msg
+#define schema_ConfigurationResponse_fields &schema_ConfigurationResponse_msg
+
+/* Maximum encoded size of messages (where known) */
+/* schema_ExecuteCollectorConfig_size depends on runtime parameters */
+/* schema_ConfigurationRequest_size depends on runtime parameters */
+/* schema_ConfigurationResponse_size depends on runtime parameters */
+#define schema_ContainerCollectorConfig_size     2
+#define schema_MemExecCollectorConfig_size       2
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif

diff --git a/security/container/protos/config.proto b/security/container/protos/config.proto
new file mode 100644
index 0000000..e32a517
--- /dev/null
+++ b/security/container/protos/config.proto

@@ -0,0 +1,51 @@
+syntax = "proto3";
+
+package schema;
+
+// Collect information about running containers
+message ContainerCollectorConfig {
+  bool enabled = 1;
+}
+
+message ExecuteCollectorConfig {
+  bool enabled = 1;
+
+  // truncate argv/envp if cumulative length exceeds limit
+  uint32 argv_limit = 2;
+  uint32 envp_limit = 3;
+
+  // If specified, only report the named environment variables.  An
+  // empty envp_allowlist indicates that all environment variables
+  // should be reported up to a cumulative total of envp_limit bytes.
+  repeated string envp_allowlist = 4;
+}
+
+// Collect information about executable memory mappings.
+message MemExecCollectorConfig {
+  bool enabled = 1;
+}
+
+// Convey configuration information to Guest LSM
+message ConfigurationRequest {
+  ContainerCollectorConfig container_config = 1;
+  ExecuteCollectorConfig execute_config = 2;
+  MemExecCollectorConfig memexec_config = 3;
+
+  // Additional configuration messages will be added as new collectors
+  // are implemented
+}
+
+// Report success or failure of previous ConfigurationRequest
+message ConfigurationResponse {
+  enum ErrorCode {
+    // Keep values in sync with
+    // https://github.com/googleapis/googleapis/blob/master/google/rpc/code.proto
+    NO_ERROR = 0;
+    UNKNOWN = 2;
+  }
+
+  ErrorCode error = 1;
+  string msg = 2;
+  uint64 version = 3;         // Version of the LSM
+  uint32 kernel_version = 4;  // LINUX_VERSION_CODE
+}

diff --git a/security/container/protos/event.pb.c b/security/container/protos/event.pb.c
new file mode 100644
index 0000000..2293566
--- /dev/null
+++ b/security/container/protos/event.pb.c

@@ -0,0 +1,61 @@
+/* Automatically generated nanopb constant definitions */
+/* Generated by nanopb-0.4.5 */
+
+#include "event.pb.h"
+#if PB_PROTO_HEADER_VERSION != 40
+#error Regenerate this file with the current version of nanopb generator.
+#endif
+
+PB_BIND(schema_SocketIp, schema_SocketIp, AUTO)
+
+
+PB_BIND(schema_Socket, schema_Socket, AUTO)
+
+
+PB_BIND(schema_Overlay, schema_Overlay, AUTO)
+
+
+PB_BIND(schema_File, schema_File, AUTO)
+
+
+PB_BIND(schema_ProcessArguments, schema_ProcessArguments, AUTO)
+
+
+PB_BIND(schema_Descriptor, schema_Descriptor, AUTO)
+
+
+PB_BIND(schema_Streams, schema_Streams, 2)
+
+
+PB_BIND(schema_Process, schema_Process, 2)
+
+
+PB_BIND(schema_Container, schema_Container, AUTO)
+
+
+PB_BIND(schema_ExecuteEvent, schema_ExecuteEvent, 2)
+
+
+PB_BIND(schema_CloneEvent, schema_CloneEvent, 2)
+
+
+PB_BIND(schema_EnumerateProcessEvent, schema_EnumerateProcessEvent, 2)
+
+
+PB_BIND(schema_MemoryExecEvent, schema_MemoryExecEvent, 2)
+
+
+PB_BIND(schema_ContainerInfoEvent, schema_ContainerInfoEvent, AUTO)
+
+
+PB_BIND(schema_ExitEvent, schema_ExitEvent, AUTO)
+
+
+PB_BIND(schema_Event, schema_Event, 2)
+
+
+PB_BIND(schema_ContainerReport, schema_ContainerReport, AUTO)
+
+
+
+

diff --git a/security/container/protos/event.pb.h b/security/container/protos/event.pb.h
new file mode 100644
index 0000000..9535068
--- /dev/null
+++ b/security/container/protos/event.pb.h

@@ -0,0 +1,518 @@
+/* Automatically generated nanopb header */
+/* Generated by nanopb-0.4.5 */
+
+#ifndef PB_SCHEMA_EVENT_PB_H_INCLUDED
+#define PB_SCHEMA_EVENT_PB_H_INCLUDED
+#include <pb.h>
+
+#if PB_PROTO_HEADER_VERSION != 40
+#error Regenerate this file with the current version of nanopb generator.
+#endif
+
+/* Enum definitions */
+typedef enum _schema_MemoryExecEvent_Action { 
+    schema_MemoryExecEvent_Action_UNDEFINED = 0, 
+    schema_MemoryExecEvent_Action_MPROTECT = 1, 
+    schema_MemoryExecEvent_Action_MMAP_FILE = 2 
+} schema_MemoryExecEvent_Action;
+
+/* Struct definitions */
+/* The process with the indicated pid has exited. */
+typedef struct _schema_ExitEvent { 
+    pb_callback_t process_uuid; 
+} schema_ExitEvent;
+
+typedef struct _schema_Container { 
+    uint64_t creation_timestamp; /* container create time in ns */
+    pb_callback_t pod_namespace; 
+    pb_callback_t pod_name; 
+    uint64_t container_id; /* unique across lifetime of Node */
+    pb_callback_t container_name; 
+    pb_callback_t container_image_uri; 
+    pb_callback_t labels; 
+    pb_callback_t init_uuid; 
+    pb_callback_t container_image_id; 
+} schema_Container;
+
+typedef struct _schema_Overlay { 
+    bool lower_layer; 
+    bool upper_layer; 
+    pb_callback_t modified_uuid; /* The process who first modified the file. */
+} schema_Overlay;
+
+typedef struct _schema_ProcessArguments { 
+    pb_callback_t argv; /* process arguments */
+    uint32_t argv_truncated; /* number of characters truncated from argv */
+    pb_callback_t envp; /* process environment variables */
+    uint32_t envp_truncated; /* number of characters truncated from envp */
+} schema_ProcessArguments;
+
+typedef struct _schema_SocketIp { 
+    uint32_t family; /* AF_* for socket type. */
+    pb_callback_t ip; /* ip4 or ip6 address. */
+    uint32_t port; /* port bind or connected. */
+} schema_SocketIp;
+
+/* Associate the following container information with all processes
+ that have the indicated container_id. */
+typedef struct _schema_ContainerInfoEvent { 
+    bool has_container;
+    schema_Container container; 
+} schema_ContainerInfoEvent;
+
+/* Message sent by the daemonset to the LSM for container enlightenment. */
+typedef struct _schema_ContainerReport { 
+    uint32_t pid; /* Top pid of the running container. */
+    bool has_container;
+    schema_Container container; /* Information collected about the container. */
+} schema_ContainerReport;
+
+typedef struct _schema_Socket { 
+    bool has_local;
+    schema_SocketIp local; 
+    bool has_remote;
+    schema_SocketIp remote; /* unset if not connected. */
+} schema_Socket;
+
+typedef struct _schema_File { 
+    pb_callback_t fullpath; 
+    pb_size_t which_filesystem;
+    union {
+        schema_Overlay overlayfs;
+        schema_Socket socket;
+    } filesystem; /* inode number. */
+    uint32_t ino; 
+    uint64_t ctime; 
+} schema_File;
+
+typedef struct _schema_Descriptor { 
+    uint32_t mode; /* file mode (stat st_mode) */
+    bool has_file;
+    schema_File file; 
+} schema_Descriptor;
+
+typedef struct _schema_Streams { 
+    bool has_stdin;
+    schema_Descriptor stdin; 
+    bool has_stdout;
+    schema_Descriptor stdout; 
+    bool has_stderr;
+    schema_Descriptor stderr; 
+} schema_Streams;
+
+typedef struct _schema_Process { 
+    uint64_t creation_timestamp; /* Only populated in ExecuteEvent, in ns. */
+    pb_callback_t uuid; 
+    uint32_t pid; 
+    bool has_binary;
+    schema_File binary; /* Only populated in ExecuteEvent. */
+    uint32_t parent_pid; 
+    pb_callback_t parent_uuid; 
+    uint64_t container_id; /* unique id of process's container */
+    uint32_t container_pid; /* pid inside the container namespace pid */
+    uint32_t container_parent_pid; /* optional */
+    bool has_args;
+    schema_ProcessArguments args; /* Only populated in ExecuteEvent. */
+    bool has_streams;
+    schema_Streams streams; /* Only populated in ExecuteEvent. */
+    uint64_t exec_session_id; /* identifier set for kubectl exec sessions. */
+} schema_Process;
+
+/* A process clone is being created. This message means that a cloning operation
+ is being attempted. It may be sent even if fork fails. */
+typedef struct _schema_CloneEvent { 
+    bool has_proc;
+    schema_Process proc; 
+} schema_CloneEvent;
+
+/* Processes that are enumerated at startup will be sent with this event. There
+ is no distinction from events we would have seen from fork or exec. */
+typedef struct _schema_EnumerateProcessEvent { 
+    bool has_proc;
+    schema_Process proc; 
+} schema_EnumerateProcessEvent;
+
+/* A binary being executed.
+ e.g., execve() */
+typedef struct _schema_ExecuteEvent { 
+    bool has_proc;
+    schema_Process proc; 
+} schema_ExecuteEvent;
+
+/* Collect information about mmap/mprotect calls with the PROT_EXEC flag set. */
+typedef struct _schema_MemoryExecEvent { 
+    bool has_proc;
+    schema_Process proc; /* The origin process */
+    /* The timestamp in ns when the memory was set executable */
+    uint64_t prot_exec_timestamp; 
+    /* The prot flags granted by the kernel for the operation */
+    uint64_t new_flags; 
+    /* The prot flags requested for the mprotect/mmap operation */
+    uint64_t req_flags; 
+    /* The vm_flags prior to the mprotect operation, if relevant */
+    uint64_t old_vm_flags; 
+    /* The operational flags for the mmap operation, if relevant */
+    uint64_t mmap_flags; 
+    /* Derived from the file struct describing the fd being mapped */
+    bool has_mapped_file;
+    schema_File mapped_file; 
+    schema_MemoryExecEvent_Action action; 
+    uint64_t start_addr; /* The executable memory region start addr */
+    uint64_t end_addr; /* The executable memory region end addr */
+    /* True if this event is a mmap of the process' binary */
+    bool is_initial_mmap; 
+} schema_MemoryExecEvent;
+
+/* Next ID: 8 */
+typedef struct _schema_Event { 
+    pb_size_t which_event;
+    union {
+        schema_ExecuteEvent execute;
+        schema_ContainerInfoEvent container;
+        schema_ExitEvent exit;
+        schema_MemoryExecEvent memexec;
+        schema_CloneEvent clone;
+        schema_EnumerateProcessEvent enumproc;
+    } event; 
+    uint64_t timestamp; 
+} schema_Event;
+
+
+/* Helper constants for enums */
+#define _schema_MemoryExecEvent_Action_MIN schema_MemoryExecEvent_Action_UNDEFINED
+#define _schema_MemoryExecEvent_Action_MAX schema_MemoryExecEvent_Action_MMAP_FILE
+#define _schema_MemoryExecEvent_Action_ARRAYSIZE ((schema_MemoryExecEvent_Action)(schema_MemoryExecEvent_Action_MMAP_FILE+1))
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Initializer values for message structs */
+#define schema_SocketIp_init_default             {0, {{NULL}, NULL}, 0}
+#define schema_Socket_init_default               {false, schema_SocketIp_init_default, false, schema_SocketIp_init_default}
+#define schema_Overlay_init_default              {0, 0, {{NULL}, NULL}}
+#define schema_File_init_default                 {{{NULL}, NULL}, 0, {schema_Overlay_init_default}, 0, 0}
+#define schema_ProcessArguments_init_default     {{{NULL}, NULL}, 0, {{NULL}, NULL}, 0}
+#define schema_Descriptor_init_default           {0, false, schema_File_init_default}
+#define schema_Streams_init_default              {false, schema_Descriptor_init_default, false, schema_Descriptor_init_default, false, schema_Descriptor_init_default}
+#define schema_Process_init_default              {0, {{NULL}, NULL}, 0, false, schema_File_init_default, 0, {{NULL}, NULL}, 0, 0, 0, false, schema_ProcessArguments_init_default, false, schema_Streams_init_default, 0}
+#define schema_Container_init_default            {0, {{NULL}, NULL}, {{NULL}, NULL}, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
+#define schema_ExecuteEvent_init_default         {false, schema_Process_init_default}
+#define schema_CloneEvent_init_default           {false, schema_Process_init_default}
+#define schema_EnumerateProcessEvent_init_default {false, schema_Process_init_default}
+#define schema_MemoryExecEvent_init_default      {false, schema_Process_init_default, 0, 0, 0, 0, 0, false, schema_File_init_default, _schema_MemoryExecEvent_Action_MIN, 0, 0, 0}
+#define schema_ContainerInfoEvent_init_default   {false, schema_Container_init_default}
+#define schema_ExitEvent_init_default            {{{NULL}, NULL}}
+#define schema_Event_init_default                {0, {schema_ExecuteEvent_init_default}, 0}
+#define schema_ContainerReport_init_default      {0, false, schema_Container_init_default}
+#define schema_SocketIp_init_zero                {0, {{NULL}, NULL}, 0}
+#define schema_Socket_init_zero                  {false, schema_SocketIp_init_zero, false, schema_SocketIp_init_zero}
+#define schema_Overlay_init_zero                 {0, 0, {{NULL}, NULL}}
+#define schema_File_init_zero                    {{{NULL}, NULL}, 0, {schema_Overlay_init_zero}, 0, 0}
+#define schema_ProcessArguments_init_zero        {{{NULL}, NULL}, 0, {{NULL}, NULL}, 0}
+#define schema_Descriptor_init_zero              {0, false, schema_File_init_zero}
+#define schema_Streams_init_zero                 {false, schema_Descriptor_init_zero, false, schema_Descriptor_init_zero, false, schema_Descriptor_init_zero}
+#define schema_Process_init_zero                 {0, {{NULL}, NULL}, 0, false, schema_File_init_zero, 0, {{NULL}, NULL}, 0, 0, 0, false, schema_ProcessArguments_init_zero, false, schema_Streams_init_zero, 0}
+#define schema_Container_init_zero               {0, {{NULL}, NULL}, {{NULL}, NULL}, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
+#define schema_ExecuteEvent_init_zero            {false, schema_Process_init_zero}
+#define schema_CloneEvent_init_zero              {false, schema_Process_init_zero}
+#define schema_EnumerateProcessEvent_init_zero   {false, schema_Process_init_zero}
+#define schema_MemoryExecEvent_init_zero         {false, schema_Process_init_zero, 0, 0, 0, 0, 0, false, schema_File_init_zero, _schema_MemoryExecEvent_Action_MIN, 0, 0, 0}
+#define schema_ContainerInfoEvent_init_zero      {false, schema_Container_init_zero}
+#define schema_ExitEvent_init_zero               {{{NULL}, NULL}}
+#define schema_Event_init_zero                   {0, {schema_ExecuteEvent_init_zero}, 0}
+#define schema_ContainerReport_init_zero         {0, false, schema_Container_init_zero}
+
+/* Field tags (for use in manual encoding/decoding) */
+#define schema_ExitEvent_process_uuid_tag        1
+#define schema_Container_creation_timestamp_tag  1
+#define schema_Container_pod_namespace_tag       2
+#define schema_Container_pod_name_tag            3
+#define schema_Container_container_id_tag        4
+#define schema_Container_container_name_tag      5
+#define schema_Container_container_image_uri_tag 6
+#define schema_Container_labels_tag              7
+#define schema_Container_init_uuid_tag           8
+#define schema_Container_container_image_id_tag  9
+#define schema_Overlay_lower_layer_tag           1
+#define schema_Overlay_upper_layer_tag           2
+#define schema_Overlay_modified_uuid_tag         3
+#define schema_ProcessArguments_argv_tag         1
+#define schema_ProcessArguments_argv_truncated_tag 2
+#define schema_ProcessArguments_envp_tag         3
+#define schema_ProcessArguments_envp_truncated_tag 4
+#define schema_SocketIp_family_tag               1
+#define schema_SocketIp_ip_tag                   2
+#define schema_SocketIp_port_tag                 3
+#define schema_ContainerInfoEvent_container_tag  1
+#define schema_ContainerReport_pid_tag           1
+#define schema_ContainerReport_container_tag     2
+#define schema_Socket_local_tag                  1
+#define schema_Socket_remote_tag                 2
+#define schema_File_fullpath_tag                 1
+#define schema_File_overlayfs_tag                2
+#define schema_File_socket_tag                   4
+#define schema_File_ino_tag                      3
+#define schema_File_ctime_tag                    5
+#define schema_Descriptor_mode_tag               1
+#define schema_Descriptor_file_tag               2
+#define schema_Streams_stdin_tag                 1
+#define schema_Streams_stdout_tag                2
+#define schema_Streams_stderr_tag                3
+#define schema_Process_creation_timestamp_tag    1
+#define schema_Process_uuid_tag                  2
+#define schema_Process_pid_tag                   3
+#define schema_Process_binary_tag                4
+#define schema_Process_parent_pid_tag            5
+#define schema_Process_parent_uuid_tag           6
+#define schema_Process_container_id_tag          7
+#define schema_Process_container_pid_tag         8
+#define schema_Process_container_parent_pid_tag  9
+#define schema_Process_args_tag                  10
+#define schema_Process_streams_tag               11
+#define schema_Process_exec_session_id_tag       12
+#define schema_CloneEvent_proc_tag               1
+#define schema_EnumerateProcessEvent_proc_tag    1
+#define schema_ExecuteEvent_proc_tag             1
+#define schema_MemoryExecEvent_proc_tag          1
+#define schema_MemoryExecEvent_prot_exec_timestamp_tag 2
+#define schema_MemoryExecEvent_new_flags_tag     3
+#define schema_MemoryExecEvent_req_flags_tag     4
+#define schema_MemoryExecEvent_old_vm_flags_tag  5
+#define schema_MemoryExecEvent_mmap_flags_tag    6
+#define schema_MemoryExecEvent_mapped_file_tag   7
+#define schema_MemoryExecEvent_action_tag        8
+#define schema_MemoryExecEvent_start_addr_tag    9
+#define schema_MemoryExecEvent_end_addr_tag      10
+#define schema_MemoryExecEvent_is_initial_mmap_tag 11
+#define schema_Event_execute_tag                 1
+#define schema_Event_container_tag               2
+#define schema_Event_exit_tag                    3
+#define schema_Event_memexec_tag                 4
+#define schema_Event_clone_tag                   5
+#define schema_Event_enumproc_tag                7
+#define schema_Event_timestamp_tag               6
+
+/* Struct field encoding specification for nanopb */
+#define schema_SocketIp_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, UINT32,   family,            1) \
+X(a, CALLBACK, SINGULAR, BYTES,    ip,                2) \
+X(a, STATIC,   SINGULAR, UINT32,   port,              3)
+#define schema_SocketIp_CALLBACK pb_default_field_callback
+#define schema_SocketIp_DEFAULT NULL
+
+#define schema_Socket_FIELDLIST(X, a) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  local,             1) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  remote,            2)
+#define schema_Socket_CALLBACK NULL
+#define schema_Socket_DEFAULT NULL
+#define schema_Socket_local_MSGTYPE schema_SocketIp
+#define schema_Socket_remote_MSGTYPE schema_SocketIp
+
+#define schema_Overlay_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, BOOL,     lower_layer,       1) \
+X(a, STATIC,   SINGULAR, BOOL,     upper_layer,       2) \
+X(a, CALLBACK, SINGULAR, BYTES,    modified_uuid,     3)
+#define schema_Overlay_CALLBACK pb_default_field_callback
+#define schema_Overlay_DEFAULT NULL
+
+#define schema_File_FIELDLIST(X, a) \
+X(a, CALLBACK, SINGULAR, BYTES,    fullpath,          1) \
+X(a, STATIC,   ONEOF,    MESSAGE,  (filesystem,overlayfs,filesystem.overlayfs),   2) \
+X(a, STATIC,   SINGULAR, UINT32,   ino,               3) \
+X(a, STATIC,   ONEOF,    MESSAGE,  (filesystem,socket,filesystem.socket),   4) \
+X(a, STATIC,   SINGULAR, UINT64,   ctime,             5)
+#define schema_File_CALLBACK pb_default_field_callback
+#define schema_File_DEFAULT NULL
+#define schema_File_filesystem_overlayfs_MSGTYPE schema_Overlay
+#define schema_File_filesystem_socket_MSGTYPE schema_Socket
+
+#define schema_ProcessArguments_FIELDLIST(X, a) \
+X(a, CALLBACK, REPEATED, BYTES,    argv,              1) \
+X(a, STATIC,   SINGULAR, UINT32,   argv_truncated,    2) \
+X(a, CALLBACK, REPEATED, BYTES,    envp,              3) \
+X(a, STATIC,   SINGULAR, UINT32,   envp_truncated,    4)
+#define schema_ProcessArguments_CALLBACK pb_default_field_callback
+#define schema_ProcessArguments_DEFAULT NULL
+
+#define schema_Descriptor_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, UINT32,   mode,              1) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  file,              2)
+#define schema_Descriptor_CALLBACK NULL
+#define schema_Descriptor_DEFAULT NULL
+#define schema_Descriptor_file_MSGTYPE schema_File
+
+#define schema_Streams_FIELDLIST(X, a) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  stdin,             1) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  stdout,            2) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  stderr,            3)
+#define schema_Streams_CALLBACK NULL
+#define schema_Streams_DEFAULT NULL
+#define schema_Streams_stdin_MSGTYPE schema_Descriptor
+#define schema_Streams_stdout_MSGTYPE schema_Descriptor
+#define schema_Streams_stderr_MSGTYPE schema_Descriptor
+
+#define schema_Process_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, UINT64,   creation_timestamp,   1) \
+X(a, CALLBACK, SINGULAR, BYTES,    uuid,              2) \
+X(a, STATIC,   SINGULAR, UINT32,   pid,               3) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  binary,            4) \
+X(a, STATIC,   SINGULAR, UINT32,   parent_pid,        5) \
+X(a, CALLBACK, SINGULAR, BYTES,    parent_uuid,       6) \
+X(a, STATIC,   SINGULAR, UINT64,   container_id,      7) \
+X(a, STATIC,   SINGULAR, UINT32,   container_pid,     8) \
+X(a, STATIC,   SINGULAR, UINT32,   container_parent_pid,   9) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  args,             10) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  streams,          11) \
+X(a, STATIC,   SINGULAR, UINT64,   exec_session_id,  12)
+#define schema_Process_CALLBACK pb_default_field_callback
+#define schema_Process_DEFAULT NULL
+#define schema_Process_binary_MSGTYPE schema_File
+#define schema_Process_args_MSGTYPE schema_ProcessArguments
+#define schema_Process_streams_MSGTYPE schema_Streams
+
+#define schema_Container_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, UINT64,   creation_timestamp,   1) \
+X(a, CALLBACK, SINGULAR, BYTES,    pod_namespace,     2) \
+X(a, CALLBACK, SINGULAR, BYTES,    pod_name,          3) \
+X(a, STATIC,   SINGULAR, UINT64,   container_id,      4) \
+X(a, CALLBACK, SINGULAR, BYTES,    container_name,    5) \
+X(a, CALLBACK, SINGULAR, BYTES,    container_image_uri,   6) \
+X(a, CALLBACK, REPEATED, BYTES,    labels,            7) \
+X(a, CALLBACK, SINGULAR, BYTES,    init_uuid,         8) \
+X(a, CALLBACK, SINGULAR, BYTES,    container_image_id,   9)
+#define schema_Container_CALLBACK pb_default_field_callback
+#define schema_Container_DEFAULT NULL
+
+#define schema_ExecuteEvent_FIELDLIST(X, a) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  proc,              1)
+#define schema_ExecuteEvent_CALLBACK NULL
+#define schema_ExecuteEvent_DEFAULT NULL
+#define schema_ExecuteEvent_proc_MSGTYPE schema_Process
+
+#define schema_CloneEvent_FIELDLIST(X, a) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  proc,              1)
+#define schema_CloneEvent_CALLBACK NULL
+#define schema_CloneEvent_DEFAULT NULL
+#define schema_CloneEvent_proc_MSGTYPE schema_Process
+
+#define schema_EnumerateProcessEvent_FIELDLIST(X, a) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  proc,              1)
+#define schema_EnumerateProcessEvent_CALLBACK NULL
+#define schema_EnumerateProcessEvent_DEFAULT NULL
+#define schema_EnumerateProcessEvent_proc_MSGTYPE schema_Process
+
+#define schema_MemoryExecEvent_FIELDLIST(X, a) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  proc,              1) \
+X(a, STATIC,   SINGULAR, UINT64,   prot_exec_timestamp,   2) \
+X(a, STATIC,   SINGULAR, UINT64,   new_flags,         3) \
+X(a, STATIC,   SINGULAR, UINT64,   req_flags,         4) \
+X(a, STATIC,   SINGULAR, UINT64,   old_vm_flags,      5) \
+X(a, STATIC,   SINGULAR, UINT64,   mmap_flags,        6) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  mapped_file,       7) \
+X(a, STATIC,   SINGULAR, UENUM,    action,            8) \
+X(a, STATIC,   SINGULAR, UINT64,   start_addr,        9) \
+X(a, STATIC,   SINGULAR, UINT64,   end_addr,         10) \
+X(a, STATIC,   SINGULAR, BOOL,     is_initial_mmap,  11)
+#define schema_MemoryExecEvent_CALLBACK NULL
+#define schema_MemoryExecEvent_DEFAULT NULL
+#define schema_MemoryExecEvent_proc_MSGTYPE schema_Process
+#define schema_MemoryExecEvent_mapped_file_MSGTYPE schema_File
+
+#define schema_ContainerInfoEvent_FIELDLIST(X, a) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  container,         1)
+#define schema_ContainerInfoEvent_CALLBACK NULL
+#define schema_ContainerInfoEvent_DEFAULT NULL
+#define schema_ContainerInfoEvent_container_MSGTYPE schema_Container
+
+#define schema_ExitEvent_FIELDLIST(X, a) \
+X(a, CALLBACK, SINGULAR, BYTES,    process_uuid,      1)
+#define schema_ExitEvent_CALLBACK pb_default_field_callback
+#define schema_ExitEvent_DEFAULT NULL
+
+#define schema_Event_FIELDLIST(X, a) \
+X(a, STATIC,   ONEOF,    MESSAGE,  (event,execute,event.execute),   1) \
+X(a, STATIC,   ONEOF,    MESSAGE,  (event,container,event.container),   2) \
+X(a, STATIC,   ONEOF,    MESSAGE,  (event,exit,event.exit),   3) \
+X(a, STATIC,   ONEOF,    MESSAGE,  (event,memexec,event.memexec),   4) \
+X(a, STATIC,   ONEOF,    MESSAGE,  (event,clone,event.clone),   5) \
+X(a, STATIC,   SINGULAR, UINT64,   timestamp,         6) \
+X(a, STATIC,   ONEOF,    MESSAGE,  (event,enumproc,event.enumproc),   7)
+#define schema_Event_CALLBACK NULL
+#define schema_Event_DEFAULT NULL
+#define schema_Event_event_execute_MSGTYPE schema_ExecuteEvent
+#define schema_Event_event_container_MSGTYPE schema_ContainerInfoEvent
+#define schema_Event_event_exit_MSGTYPE schema_ExitEvent
+#define schema_Event_event_memexec_MSGTYPE schema_MemoryExecEvent
+#define schema_Event_event_clone_MSGTYPE schema_CloneEvent
+#define schema_Event_event_enumproc_MSGTYPE schema_EnumerateProcessEvent
+
+#define schema_ContainerReport_FIELDLIST(X, a) \
+X(a, STATIC,   SINGULAR, UINT32,   pid,               1) \
+X(a, STATIC,   OPTIONAL, MESSAGE,  container,         2)
+#define schema_ContainerReport_CALLBACK NULL
+#define schema_ContainerReport_DEFAULT NULL
+#define schema_ContainerReport_container_MSGTYPE schema_Container
+
+extern const pb_msgdesc_t schema_SocketIp_msg;
+extern const pb_msgdesc_t schema_Socket_msg;
+extern const pb_msgdesc_t schema_Overlay_msg;
+extern const pb_msgdesc_t schema_File_msg;
+extern const pb_msgdesc_t schema_ProcessArguments_msg;
+extern const pb_msgdesc_t schema_Descriptor_msg;
+extern const pb_msgdesc_t schema_Streams_msg;
+extern const pb_msgdesc_t schema_Process_msg;
+extern const pb_msgdesc_t schema_Container_msg;
+extern const pb_msgdesc_t schema_ExecuteEvent_msg;
+extern const pb_msgdesc_t schema_CloneEvent_msg;
+extern const pb_msgdesc_t schema_EnumerateProcessEvent_msg;
+extern const pb_msgdesc_t schema_MemoryExecEvent_msg;
+extern const pb_msgdesc_t schema_ContainerInfoEvent_msg;
+extern const pb_msgdesc_t schema_ExitEvent_msg;
+extern const pb_msgdesc_t schema_Event_msg;
+extern const pb_msgdesc_t schema_ContainerReport_msg;
+
+/* Defines for backwards compatibility with code written before nanopb-0.4.0 */
+#define schema_SocketIp_fields &schema_SocketIp_msg
+#define schema_Socket_fields &schema_Socket_msg
+#define schema_Overlay_fields &schema_Overlay_msg
+#define schema_File_fields &schema_File_msg
+#define schema_ProcessArguments_fields &schema_ProcessArguments_msg
+#define schema_Descriptor_fields &schema_Descriptor_msg
+#define schema_Streams_fields &schema_Streams_msg
+#define schema_Process_fields &schema_Process_msg
+#define schema_Container_fields &schema_Container_msg
+#define schema_ExecuteEvent_fields &schema_ExecuteEvent_msg
+#define schema_CloneEvent_fields &schema_CloneEvent_msg
+#define schema_EnumerateProcessEvent_fields &schema_EnumerateProcessEvent_msg
+#define schema_MemoryExecEvent_fields &schema_MemoryExecEvent_msg
+#define schema_ContainerInfoEvent_fields &schema_ContainerInfoEvent_msg
+#define schema_ExitEvent_fields &schema_ExitEvent_msg
+#define schema_Event_fields &schema_Event_msg
+#define schema_ContainerReport_fields &schema_ContainerReport_msg
+
+/* Maximum encoded size of messages (where known) */
+/* schema_SocketIp_size depends on runtime parameters */
+/* schema_Socket_size depends on runtime parameters */
+/* schema_Overlay_size depends on runtime parameters */
+/* schema_File_size depends on runtime parameters */
+/* schema_ProcessArguments_size depends on runtime parameters */
+/* schema_Descriptor_size depends on runtime parameters */
+/* schema_Streams_size depends on runtime parameters */
+/* schema_Process_size depends on runtime parameters */
+/* schema_Container_size depends on runtime parameters */
+/* schema_ExecuteEvent_size depends on runtime parameters */
+/* schema_CloneEvent_size depends on runtime parameters */
+/* schema_EnumerateProcessEvent_size depends on runtime parameters */
+/* schema_MemoryExecEvent_size depends on runtime parameters */
+/* schema_ContainerInfoEvent_size depends on runtime parameters */
+/* schema_ExitEvent_size depends on runtime parameters */
+/* schema_Event_size depends on runtime parameters */
+/* schema_ContainerReport_size depends on runtime parameters */
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif

diff --git a/security/container/protos/event.proto b/security/container/protos/event.proto
new file mode 100644
index 0000000..dfe483f
--- /dev/null
+++ b/security/container/protos/event.proto

@@ -0,0 +1,151 @@
+syntax = "proto3";
+
+package schema;
+
+message SocketIp {
+  uint32 family = 1;  // AF_* for socket type.
+  bytes ip = 2;       // ip4 or ip6 address.
+  uint32 port = 3;    // port bind or connected.
+}
+
+message Socket {
+  SocketIp local = 1;
+  SocketIp remote = 2;  // unset if not connected.
+}
+
+message Overlay {
+  bool lower_layer = 1;
+  bool upper_layer = 2;
+  bytes modified_uuid = 3;  // The process who first modified the file.
+}
+
+message File {
+  bytes fullpath = 1;
+  uint32 ino = 3;  // inode number.
+  oneof filesystem {
+    Overlay overlayfs = 2;
+    Socket socket = 4;
+  }
+}
+
+message ProcessArguments {
+  repeated bytes argv = 1;    // process arguments
+  uint32 argv_truncated = 2;  // number of characters truncated from argv
+  repeated bytes envp = 3;    // process environment variables
+  uint32 envp_truncated = 4;  // number of characters truncated from envp
+}
+
+message Descriptor {
+  uint32 mode = 1;  // file mode (stat st_mode)
+  File file = 2;
+}
+
+message Streams {
+  Descriptor stdin = 1;
+  Descriptor stdout = 2;
+  Descriptor stderr = 3;
+}
+
+message Process {
+  uint64 creation_timestamp = 1;  // Only populated in ExecuteEvent, in ns.
+  bytes uuid = 2;
+  uint32 pid = 3;
+  File binary = 4;  // Only populated in ExecuteEvent.
+  uint32 parent_pid = 5;
+  bytes parent_uuid = 6;
+  uint64 container_id = 7;          // unique id of process's container
+  uint32 container_pid = 8;         // pid inside the container namespace pid
+  uint32 container_parent_pid = 9;  // optional
+  ProcessArguments args = 10;       // Only populated in ExecuteEvent.
+  Streams streams = 11;             // Only populated in ExecuteEvent.
+  uint64 exec_session_id = 12;      // identifier set for kubectl exec sessions.
+}
+
+message Container {
+  uint64 creation_timestamp = 1;  // container create time in ns
+  bytes pod_namespace = 2;
+  bytes pod_name = 3;
+  uint64 container_id = 4;  // unique across lifetime of Node
+  bytes container_name = 5;
+  bytes container_image_uri = 6;
+  repeated bytes labels = 7;
+  bytes init_uuid = 8;
+  bytes container_image_id = 9;
+}
+
+// A binary being executed.
+// e.g., execve()
+message ExecuteEvent {
+  Process proc = 1;
+}
+
+// A process clone is being created. This message means that a cloning operation
+// is being attempted. It may be sent even if fork fails.
+message CloneEvent {
+  Process proc = 1;
+}
+
+// Processes that are enumerated at startup will be sent with this event. There
+// is no distinction from events we would have seen from fork or exec.
+message EnumerateProcessEvent {
+  Process proc = 1;
+}
+
+// Collect information about mmap/mprotect calls with the PROT_EXEC flag set.
+message MemoryExecEvent {
+  Process proc = 1;  // The origin process
+  // The timestamp in ns when the memory was set executable
+  uint64 prot_exec_timestamp = 2;
+  // The prot flags granted by the kernel for the operation
+  uint64 new_flags = 3;
+  // The prot flags requested for the mprotect/mmap operation
+  uint64 req_flags = 4;
+  // The vm_flags prior to the mprotect operation, if relevant
+  uint64 old_vm_flags = 5;
+  // The operational flags for the mmap operation, if relevant
+  uint64 mmap_flags = 6;
+  // Derived from the file struct describing the fd being mapped
+  File mapped_file = 7;
+  enum Action {
+    UNDEFINED = 0;
+    MPROTECT = 1;
+    MMAP_FILE = 2;
+  }
+  Action action = 8;
+
+  uint64 start_addr = 9;  // The executable memory region start addr
+  uint64 end_addr = 10;   // The executable memory region end addr
+  // True if this event is a mmap of the process' binary
+  bool is_initial_mmap = 11;
+}
+
+// Associate the following container information with all processes
+// that have the indicated container_id.
+message ContainerInfoEvent {
+  Container container = 1;
+}
+
+// The process with the indicated pid has exited.
+message ExitEvent {
+  bytes process_uuid = 1;
+}
+
+// Next ID: 8
+message Event {
+  oneof event {
+    ExecuteEvent execute = 1;
+    ContainerInfoEvent container = 2;
+    ExitEvent exit = 3;
+    MemoryExecEvent memexec = 4;
+    CloneEvent clone = 5;
+    EnumerateProcessEvent enumproc = 7;
+  }
+
+  uint64 timestamp = 6;  // In nanoseconds
+}
+
+// Message sent by the daemonset to the LSM for container enlightenment.
+message ContainerReport {
+  uint32 pid = 1;           // Top pid of the running container.
+  Container container = 2;  // Information collected about the container.
+}

diff --git a/security/container/protos/nanopb/LICENSE b/security/container/protos/nanopb/LICENSE
new file mode 100644
index 0000000..a83630a
--- /dev/null
+++ b/security/container/protos/nanopb/LICENSE

@@ -0,0 +1,20 @@
+Copyright (c) 2011 Petteri Aimonen <jpa at nanopb.mail.kapsi.fi>
+
+This software is provided 'as-is', without any express or
+implied warranty. In no event will the authors be held liable
+for any damages arising from the use of this software.
+
+Permission is granted to anyone to use this software for any
+purpose, including commercial applications, and to alter it and
+redistribute it freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you
+   must not claim that you wrote the original software. If you use
+   this software in a product, an acknowledgment in the product
+   documentation would be appreciated but is not required.
+
+2. Altered source versions must be plainly marked as such, and
+   must not be misrepresented as being the original software.
+
+3. This notice may not be removed or altered from any source
+   distribution.

diff --git a/security/container/protos/nanopb/METADATA b/security/container/protos/nanopb/METADATA
new file mode 100644
index 0000000..6b85630
--- /dev/null
+++ b/security/container/protos/nanopb/METADATA

@@ -0,0 +1,23 @@
+name: "nanopb"
+description: "Nanopb is a C library for encoding and decoding protocol buffers."
+
+third_party {
+  url {
+    type: GIT
+    value: "https://github.com/nanopb/nanopb/"
+  }
+  version: "0.4.5"
+  last_upgrade_date: {
+    year: 2021
+    month: 8
+    day: 12
+  }
+  license_type: NOTICE
+  security {
+    category: REVIEWED_AND_SECURE
+    note: "https://buganizer.corp.google.com/u/0/issues/19409596, https://buganizer.corp.google.com/u/0/issues/120506242"
+    tag: "NVD-CPE2.3:cpe:/a:nanopb_project:nanopb"
+    tag: "vuln_reporting:buganizer_component:588910"
+    tag: "vuln_reporting:contact_emails:"  # Blunderbuss will assign bugs.
+  }
+}

diff --git a/security/container/protos/nanopb/Makefile b/security/container/protos/nanopb/Makefile
new file mode 100644
index 0000000..b7e15f8
--- /dev/null
+++ b/security/container/protos/nanopb/Makefile

@@ -0,0 +1,7 @@
+obj-$(CONFIG_SECURITY_CONTAINER_MONITOR) += nanopb.o
+
+nanopb-y := pb_encode.o pb_decode.o pb_common.o
+
+ccflags-y := -I$(srctree)/security/container/protos \
+	-I$(srctree)/security/container/protos/nanopb \
+	$(PB_CCFLAGS)

diff --git a/security/container/protos/nanopb/pb.h b/security/container/protos/nanopb/pb.h
new file mode 100644
index 0000000..be7c067
--- /dev/null
+++ b/security/container/protos/nanopb/pb.h

@@ -0,0 +1,875 @@
+/* Common parts of the nanopb library. Most of these are quite low-level
+ * stuff. For the high-level interface, see pb_encode.h and pb_decode.h.
+ */
+
+#ifndef PB_H_INCLUDED
+#define PB_H_INCLUDED
+
+/*****************************************************************
+ * Nanopb compilation time options. You can change these here by *
+ * uncommenting the lines, or on the compiler command line.      *
+ *****************************************************************/
+
+/* Enable support for dynamically allocated fields */
+/* #define PB_ENABLE_MALLOC 1 */
+
+/* Define this if your CPU / compiler combination does not support
+ * unaligned memory access to packed structures. */
+/* #define PB_NO_PACKED_STRUCTS 1 */
+
+/* Increase the number of required fields that are tracked.
+ * A compiler warning will tell if you need this. */
+/* #define PB_MAX_REQUIRED_FIELDS 256 */
+
+/* Add support for tag numbers > 65536 and fields larger than 65536 bytes. */
+/* #define PB_FIELD_32BIT 1 */
+
+/* Disable support for error messages in order to save some code space. */
+/* #define PB_NO_ERRMSG 1 */
+
+/* Disable support for custom streams (support only memory buffers). */
+/* #define PB_BUFFER_ONLY 1 */
+
+/* Disable support for 64-bit datatypes, for compilers without int64_t
+   or to save some code space. */
+/* #define PB_WITHOUT_64BIT 1 */
+
+/* Don't encode scalar arrays as packed. This is only to be used when
+ * the decoder on the receiving side cannot process packed scalar arrays.
+ * Such example is older protobuf.js. */
+/* #define PB_ENCODE_ARRAYS_UNPACKED 1 */
+
+/* Enable conversion of doubles to floats for platforms that do not
+ * support 64-bit doubles. Most commonly AVR. */
+/* #define PB_CONVERT_DOUBLE_FLOAT 1 */
+
+/* Check whether incoming strings are valid UTF-8 sequences. Slows down
+ * the string processing slightly and slightly increases code size. */
+/* #define PB_VALIDATE_UTF8 1 */
+
+/******************************************************************
+ * You usually don't need to change anything below this line.     *
+ * Feel free to look around and use the defined macros, though.   *
+ ******************************************************************/
+
+
+/* Version of the nanopb library. Just in case you want to check it in
+ * your own program. */
+#define NANOPB_VERSION nanopb-0.4.5
+
+/* Include all the system headers needed by nanopb. You will need the
+ * definitions of the following:
+ * - strlen, memcpy, memset functions
+ * - [u]int_least8_t, uint_fast8_t, [u]int_least16_t, [u]int32_t, [u]int64_t
+ * - size_t
+ * - bool
+ *
+ * If you don't have the standard header files, you can instead provide
+ * a custom header that defines or includes all this. In that case,
+ * define PB_SYSTEM_HEADER to the path of this file.
+ */
+#ifdef PB_SYSTEM_HEADER
+#include PB_SYSTEM_HEADER
+#else
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <limits.h>
+
+#ifdef PB_ENABLE_MALLOC
+#include <stdlib.h>
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Macro for defining packed structures (compiler dependent).
+ * This just reduces memory requirements, but is not required.
+ */
+#if defined(PB_NO_PACKED_STRUCTS)
+    /* Disable struct packing */
+#   define PB_PACKED_STRUCT_START
+#   define PB_PACKED_STRUCT_END
+#   define pb_packed
+#elif defined(__GNUC__) || defined(__clang__)
+    /* For GCC and clang */
+#   define PB_PACKED_STRUCT_START
+#   define PB_PACKED_STRUCT_END
+#   define pb_packed __attribute__((packed))
+#elif defined(__ICCARM__) || defined(__CC_ARM)
+    /* For IAR ARM and Keil MDK-ARM compilers */
+#   define PB_PACKED_STRUCT_START _Pragma("pack(push, 1)")
+#   define PB_PACKED_STRUCT_END _Pragma("pack(pop)")
+#   define pb_packed
+#elif defined(_MSC_VER) && (_MSC_VER >= 1500)
+    /* For Microsoft Visual C++ */
+#   define PB_PACKED_STRUCT_START __pragma(pack(push, 1))
+#   define PB_PACKED_STRUCT_END __pragma(pack(pop))
+#   define pb_packed
+#else
+    /* Unknown compiler */
+#   define PB_PACKED_STRUCT_START
+#   define PB_PACKED_STRUCT_END
+#   define pb_packed
+#endif
+
+/* Handly macro for suppressing unreferenced-parameter compiler warnings. */
+#ifndef PB_UNUSED
+#define PB_UNUSED(x) (void)(x)
+#endif
+
+/* Harvard-architecture processors may need special attributes for storing
+ * field information in program memory. */
+#ifndef PB_PROGMEM
+#ifdef __AVR__
+#include <avr/pgmspace.h>
+#define PB_PROGMEM             PROGMEM
+#define PB_PROGMEM_READU32(x)  pgm_read_dword(&x)
+#else
+#define PB_PROGMEM
+#define PB_PROGMEM_READU32(x)  (x)
+#endif
+#endif
+
+/* Compile-time assertion, used for checking compatible compilation options.
+ * If this does not work properly on your compiler, use
+ * #define PB_NO_STATIC_ASSERT to disable it.
+ *
+ * But before doing that, check carefully the error message / place where it
+ * comes from to see if the error has a real cause. Unfortunately the error
+ * message is not always very clear to read, but you can see the reason better
+ * in the place where the PB_STATIC_ASSERT macro was called.
+ */
+#ifndef PB_NO_STATIC_ASSERT
+#  ifndef PB_STATIC_ASSERT
+#    if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+       /* C11 standard _Static_assert mechanism */
+#      define PB_STATIC_ASSERT(COND,MSG) _Static_assert(COND,#MSG);
+#    else
+       /* Classic negative-size-array static assert mechanism */
+#      define PB_STATIC_ASSERT(COND,MSG) typedef char PB_STATIC_ASSERT_MSG(MSG, __LINE__, __COUNTER__)[(COND)?1:-1];
+#      define PB_STATIC_ASSERT_MSG(MSG, LINE, COUNTER) PB_STATIC_ASSERT_MSG_(MSG, LINE, COUNTER)
+#      define PB_STATIC_ASSERT_MSG_(MSG, LINE, COUNTER) pb_static_assertion_##MSG##_##LINE##_##COUNTER
+#    endif
+#  endif
+#else
+   /* Static asserts disabled by PB_NO_STATIC_ASSERT */
+#  define PB_STATIC_ASSERT(COND,MSG)
+#endif
+
+/* Number of required fields to keep track of. */
+#ifndef PB_MAX_REQUIRED_FIELDS
+#define PB_MAX_REQUIRED_FIELDS 64
+#endif
+
+#if PB_MAX_REQUIRED_FIELDS < 64
+#error You should not lower PB_MAX_REQUIRED_FIELDS from the default value (64).
+#endif
+
+#ifdef PB_WITHOUT_64BIT
+#ifdef PB_CONVERT_DOUBLE_FLOAT
+/* Cannot use doubles without 64-bit types */
+#undef PB_CONVERT_DOUBLE_FLOAT
+#endif
+#endif
+
+/* List of possible field types. These are used in the autogenerated code.
+ * Least-significant 4 bits tell the scalar type
+ * Most-significant 4 bits specify repeated/required/packed etc.
+ */
+
+typedef uint_least8_t pb_type_t;
+
+/**** Field data types ****/
+
+/* Numeric types */
+#define PB_LTYPE_BOOL    0x00U /* bool */
+#define PB_LTYPE_VARINT  0x01U /* int32, int64, enum, bool */
+#define PB_LTYPE_UVARINT 0x02U /* uint32, uint64 */
+#define PB_LTYPE_SVARINT 0x03U /* sint32, sint64 */
+#define PB_LTYPE_FIXED32 0x04U /* fixed32, sfixed32, float */
+#define PB_LTYPE_FIXED64 0x05U /* fixed64, sfixed64, double */
+
+/* Marker for last packable field type. */
+#define PB_LTYPE_LAST_PACKABLE 0x05U
+
+/* Byte array with pre-allocated buffer.
+ * data_size is the length of the allocated PB_BYTES_ARRAY structure. */
+#define PB_LTYPE_BYTES 0x06U
+
+/* String with pre-allocated buffer.
+ * data_size is the maximum length. */
+#define PB_LTYPE_STRING 0x07U
+
+/* Submessage
+ * submsg_fields is pointer to field descriptions */
+#define PB_LTYPE_SUBMESSAGE 0x08U
+
+/* Submessage with pre-decoding callback
+ * The pre-decoding callback is stored as pb_callback_t right before pSize.
+ * submsg_fields is pointer to field descriptions */
+#define PB_LTYPE_SUBMSG_W_CB 0x09U
+
+/* Extension pseudo-field
+ * The field contains a pointer to pb_extension_t */
+#define PB_LTYPE_EXTENSION 0x0AU
+
+/* Byte array with inline, pre-allocated byffer.
+ * data_size is the length of the inline, allocated buffer.
+ * This differs from PB_LTYPE_BYTES by defining the element as
+ * pb_byte_t[data_size] rather than pb_bytes_array_t. */
+#define PB_LTYPE_FIXED_LENGTH_BYTES 0x0BU
+
+/* Number of declared LTYPES */
+#define PB_LTYPES_COUNT 0x0CU
+#define PB_LTYPE_MASK 0x0FU
+
+/**** Field repetition rules ****/
+
+#define PB_HTYPE_REQUIRED 0x00U
+#define PB_HTYPE_OPTIONAL 0x10U
+#define PB_HTYPE_SINGULAR 0x10U
+#define PB_HTYPE_REPEATED 0x20U
+#define PB_HTYPE_FIXARRAY 0x20U
+#define PB_HTYPE_ONEOF    0x30U
+#define PB_HTYPE_MASK     0x30U
+
+/**** Field allocation types ****/
+ 
+#define PB_ATYPE_STATIC   0x00U
+#define PB_ATYPE_POINTER  0x80U
+#define PB_ATYPE_CALLBACK 0x40U
+#define PB_ATYPE_MASK     0xC0U
+
+#define PB_ATYPE(x) ((x) & PB_ATYPE_MASK)
+#define PB_HTYPE(x) ((x) & PB_HTYPE_MASK)
+#define PB_LTYPE(x) ((x) & PB_LTYPE_MASK)
+#define PB_LTYPE_IS_SUBMSG(x) (PB_LTYPE(x) == PB_LTYPE_SUBMESSAGE || \
+                               PB_LTYPE(x) == PB_LTYPE_SUBMSG_W_CB)
+
+/* Data type used for storing sizes of struct fields
+ * and array counts.
+ */
+#if defined(PB_FIELD_32BIT)
+    typedef uint32_t pb_size_t;
+    typedef int32_t pb_ssize_t;
+#else
+    typedef uint_least16_t pb_size_t;
+    typedef int_least16_t pb_ssize_t;
+#endif
+#define PB_SIZE_MAX ((pb_size_t)-1)
+
+/* Data type for storing encoded data and other byte streams.
+ * This typedef exists to support platforms where uint8_t does not exist.
+ * You can regard it as equivalent on uint8_t on other platforms.
+ */
+typedef uint_least8_t pb_byte_t;
+
+/* Forward declaration of struct types */
+typedef struct pb_istream_s pb_istream_t;
+typedef struct pb_ostream_s pb_ostream_t;
+typedef struct pb_field_iter_s pb_field_iter_t;
+
+/* This structure is used in auto-generated constants
+ * to specify struct fields.
+ */
+typedef struct pb_msgdesc_s pb_msgdesc_t;
+struct pb_msgdesc_s {
+    const uint32_t *field_info;
+    const pb_msgdesc_t * const * submsg_info;
+    const pb_byte_t *default_value;
+
+    bool (*field_callback)(pb_istream_t *istream, pb_ostream_t *ostream, const pb_field_iter_t *field);
+
+    pb_size_t field_count;
+    pb_size_t required_field_count;
+    pb_size_t largest_tag;
+};
+
+/* Iterator for message descriptor */
+struct pb_field_iter_s {
+    const pb_msgdesc_t *descriptor;  /* Pointer to message descriptor constant */
+    void *message;                   /* Pointer to start of the structure */
+
+    pb_size_t index;                 /* Index of the field */
+    pb_size_t field_info_index;      /* Index to descriptor->field_info array */
+    pb_size_t required_field_index;  /* Index that counts only the required fields */
+    pb_size_t submessage_index;      /* Index that counts only submessages */
+
+    pb_size_t tag;                   /* Tag of current field */
+    pb_size_t data_size;             /* sizeof() of a single item */
+    pb_size_t array_size;            /* Number of array entries */
+    pb_type_t type;                  /* Type of current field */
+
+    void *pField;                    /* Pointer to current field in struct */
+    void *pData;                     /* Pointer to current data contents. Different than pField for arrays and pointers. */
+    void *pSize;                     /* Pointer to count/has field */
+
+    const pb_msgdesc_t *submsg_desc; /* For submessage fields, pointer to field descriptor for the submessage. */
+};
+
+/* For compatibility with legacy code */
+typedef pb_field_iter_t pb_field_t;
+
+/* Make sure that the standard integer types are of the expected sizes.
+ * Otherwise fixed32/fixed64 fields can break.
+ *
+ * If you get errors here, it probably means that your stdint.h is not
+ * correct for your platform.
+ */
+#ifndef PB_WITHOUT_64BIT
+PB_STATIC_ASSERT(sizeof(int64_t) == 2 * sizeof(int32_t), INT64_T_WRONG_SIZE)
+PB_STATIC_ASSERT(sizeof(uint64_t) == 2 * sizeof(uint32_t), UINT64_T_WRONG_SIZE)
+#endif
+
+/* This structure is used for 'bytes' arrays.
+ * It has the number of bytes in the beginning, and after that an array.
+ * Note that actual structs used will have a different length of bytes array.
+ */
+#define PB_BYTES_ARRAY_T(n) struct { pb_size_t size; pb_byte_t bytes[n]; }
+#define PB_BYTES_ARRAY_T_ALLOCSIZE(n) ((size_t)n + offsetof(pb_bytes_array_t, bytes))
+
+struct pb_bytes_array_s {
+    pb_size_t size;
+    pb_byte_t bytes[1];
+};
+typedef struct pb_bytes_array_s pb_bytes_array_t;
+
+/* This structure is used for giving the callback function.
+ * It is stored in the message structure and filled in by the method that
+ * calls pb_decode.
+ *
+ * The decoding callback will be given a limited-length stream
+ * If the wire type was string, the length is the length of the string.
+ * If the wire type was a varint/fixed32/fixed64, the length is the length
+ * of the actual value.
+ * The function may be called multiple times (especially for repeated types,
+ * but also otherwise if the message happens to contain the field multiple
+ * times.)
+ *
+ * The encoding callback will receive the actual output stream.
+ * It should write all the data in one call, including the field tag and
+ * wire type. It can write multiple fields.
+ *
+ * The callback can be null if you want to skip a field.
+ */
+typedef struct pb_callback_s pb_callback_t;
+struct pb_callback_s {
+    /* Callback functions receive a pointer to the arg field.
+     * You can access the value of the field as *arg, and modify it if needed.
+     */
+    union {
+        bool (*decode)(pb_istream_t *stream, const pb_field_t *field, void **arg);
+        bool (*encode)(pb_ostream_t *stream, const pb_field_t *field, void * const *arg);
+    } funcs;
+    
+    /* Free arg for use by callback */
+    void *arg;
+};
+
+extern bool pb_default_field_callback(pb_istream_t *istream, pb_ostream_t *ostream, const pb_field_t *field);
+
+/* Wire types. Library user needs these only in encoder callbacks. */
+typedef enum {
+    PB_WT_VARINT = 0,
+    PB_WT_64BIT  = 1,
+    PB_WT_STRING = 2,
+    PB_WT_32BIT  = 5
+} pb_wire_type_t;
+
+/* Structure for defining the handling of unknown/extension fields.
+ * Usually the pb_extension_type_t structure is automatically generated,
+ * while the pb_extension_t structure is created by the user. However,
+ * if you want to catch all unknown fields, you can also create a custom
+ * pb_extension_type_t with your own callback.
+ */
+typedef struct pb_extension_type_s pb_extension_type_t;
+typedef struct pb_extension_s pb_extension_t;
+struct pb_extension_type_s {
+    /* Called for each unknown field in the message.
+     * If you handle the field, read off all of its data and return true.
+     * If you do not handle the field, do not read anything and return true.
+     * If you run into an error, return false.
+     * Set to NULL for default handler.
+     */
+    bool (*decode)(pb_istream_t *stream, pb_extension_t *extension,
+                   uint32_t tag, pb_wire_type_t wire_type);
+    
+    /* Called once after all regular fields have been encoded.
+     * If you have something to write, do so and return true.
+     * If you do not have anything to write, just return true.
+     * If you run into an error, return false.
+     * Set to NULL for default handler.
+     */
+    bool (*encode)(pb_ostream_t *stream, const pb_extension_t *extension);
+    
+    /* Free field for use by the callback. */
+    const void *arg;
+};
+
+struct pb_extension_s {
+    /* Type describing the extension field. Usually you'll initialize
+     * this to a pointer to the automatically generated structure. */
+    const pb_extension_type_t *type;
+    
+    /* Destination for the decoded data. This must match the datatype
+     * of the extension field. */
+    void *dest;
+    
+    /* Pointer to the next extension handler, or NULL.
+     * If this extension does not match a field, the next handler is
+     * automatically called. */
+    pb_extension_t *next;
+
+    /* The decoder sets this to true if the extension was found.
+     * Ignored for encoding. */
+    bool found;
+};
+
+#define pb_extension_init_zero {NULL,NULL,NULL,false}
+
+/* Memory allocation functions to use. You can define pb_realloc and
+ * pb_free to custom functions if you want. */
+#ifdef PB_ENABLE_MALLOC
+#   ifndef pb_realloc
+#       define pb_realloc(ptr, size) realloc(ptr, size)
+#   endif
+#   ifndef pb_free
+#       define pb_free(ptr) free(ptr)
+#   endif
+#endif
+
+/* This is used to inform about need to regenerate .pb.h/.pb.c files. */
+#define PB_PROTO_HEADER_VERSION 40
+
+/* These macros are used to declare pb_field_t's in the constant array. */
+/* Size of a structure member, in bytes. */
+#define pb_membersize(st, m) (sizeof ((st*)0)->m)
+/* Number of entries in an array. */
+#define pb_arraysize(st, m) (pb_membersize(st, m) / pb_membersize(st, m[0]))
+/* Delta from start of one member to the start of another member. */
+#define pb_delta(st, m1, m2) ((int)offsetof(st, m1) - (int)offsetof(st, m2))
+
+/* Force expansion of macro value */
+#define PB_EXPAND(x) x
+
+/* Binding of a message field set into a specific structure */
+#define PB_BIND(msgname, structname, width) \
+    const uint32_t structname ## _field_info[] PB_PROGMEM = \
+    { \
+        msgname ## _FIELDLIST(PB_GEN_FIELD_INFO_ ## width, structname) \
+        0 \
+    }; \
+    const pb_msgdesc_t* const structname ## _submsg_info[] = \
+    { \
+        msgname ## _FIELDLIST(PB_GEN_SUBMSG_INFO, structname) \
+        NULL \
+    }; \
+    const pb_msgdesc_t structname ## _msg = \
+    { \
+       structname ## _field_info, \
+       structname ## _submsg_info, \
+       msgname ## _DEFAULT, \
+       msgname ## _CALLBACK, \
+       0 msgname ## _FIELDLIST(PB_GEN_FIELD_COUNT, structname), \
+       0 msgname ## _FIELDLIST(PB_GEN_REQ_FIELD_COUNT, structname), \
+       0 msgname ## _FIELDLIST(PB_GEN_LARGEST_TAG, structname), \
+    }; \
+    msgname ## _FIELDLIST(PB_GEN_FIELD_INFO_ASSERT_ ## width, structname)
+
+#define PB_GEN_FIELD_COUNT(structname, atype, htype, ltype, fieldname, tag) +1
+#define PB_GEN_REQ_FIELD_COUNT(structname, atype, htype, ltype, fieldname, tag) \
+    + (PB_HTYPE_ ## htype == PB_HTYPE_REQUIRED)
+#define PB_GEN_LARGEST_TAG(structname, atype, htype, ltype, fieldname, tag) \
+    * 0 + tag
+
+/* X-macro for generating the entries in struct_field_info[] array. */
+#define PB_GEN_FIELD_INFO_1(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_1(tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_GEN_FIELD_INFO_2(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_2(tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_GEN_FIELD_INFO_4(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_4(tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_GEN_FIELD_INFO_8(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_8(tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_GEN_FIELD_INFO_AUTO(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_AUTO2(PB_FIELDINFO_WIDTH_AUTO(_PB_ATYPE_ ## atype, _PB_HTYPE_ ## htype, _PB_LTYPE_ ## ltype), \
+                   tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_FIELDINFO_AUTO2(width, tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_FIELDINFO_AUTO3(width, tag, type, data_offset, data_size, size_offset, array_size)
+
+#define PB_FIELDINFO_AUTO3(width, tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_FIELDINFO_ ## width(tag, type, data_offset, data_size, size_offset, array_size)
+
+/* X-macro for generating asserts that entries fit in struct_field_info[] array.
+ * The structure of macros here must match the structure above in PB_GEN_FIELD_INFO_x(),
+ * but it is not easily reused because of how macro substitutions work. */
+#define PB_GEN_FIELD_INFO_ASSERT_1(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_ASSERT_1(tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_GEN_FIELD_INFO_ASSERT_2(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_ASSERT_2(tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_GEN_FIELD_INFO_ASSERT_4(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_ASSERT_4(tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_GEN_FIELD_INFO_ASSERT_8(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_ASSERT_8(tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_GEN_FIELD_INFO_ASSERT_AUTO(structname, atype, htype, ltype, fieldname, tag) \
+    PB_FIELDINFO_ASSERT_AUTO2(PB_FIELDINFO_WIDTH_AUTO(_PB_ATYPE_ ## atype, _PB_HTYPE_ ## htype, _PB_LTYPE_ ## ltype), \
+                   tag, PB_ATYPE_ ## atype | PB_HTYPE_ ## htype | PB_LTYPE_MAP_ ## ltype, \
+                   PB_DATA_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_DATA_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_SIZE_OFFSET_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname), \
+                   PB_ARRAY_SIZE_ ## atype(_PB_HTYPE_ ## htype, structname, fieldname))
+
+#define PB_FIELDINFO_ASSERT_AUTO2(width, tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_FIELDINFO_ASSERT_AUTO3(width, tag, type, data_offset, data_size, size_offset, array_size)
+
+#define PB_FIELDINFO_ASSERT_AUTO3(width, tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_FIELDINFO_ASSERT_ ## width(tag, type, data_offset, data_size, size_offset, array_size)
+
+#define PB_DATA_OFFSET_STATIC(htype, structname, fieldname) PB_DO ## htype(structname, fieldname)
+#define PB_DATA_OFFSET_POINTER(htype, structname, fieldname) PB_DO ## htype(structname, fieldname)
+#define PB_DATA_OFFSET_CALLBACK(htype, structname, fieldname) PB_DO ## htype(structname, fieldname)
+#define PB_DO_PB_HTYPE_REQUIRED(structname, fieldname) offsetof(structname, fieldname)
+#define PB_DO_PB_HTYPE_SINGULAR(structname, fieldname) offsetof(structname, fieldname)
+#define PB_DO_PB_HTYPE_ONEOF(structname, fieldname) offsetof(structname, PB_ONEOF_NAME(FULL, fieldname))
+#define PB_DO_PB_HTYPE_OPTIONAL(structname, fieldname) offsetof(structname, fieldname)
+#define PB_DO_PB_HTYPE_REPEATED(structname, fieldname) offsetof(structname, fieldname)
+#define PB_DO_PB_HTYPE_FIXARRAY(structname, fieldname) offsetof(structname, fieldname)
+
+#define PB_SIZE_OFFSET_STATIC(htype, structname, fieldname) PB_SO ## htype(structname, fieldname)
+#define PB_SIZE_OFFSET_POINTER(htype, structname, fieldname) PB_SO_PTR ## htype(structname, fieldname)
+#define PB_SIZE_OFFSET_CALLBACK(htype, structname, fieldname) PB_SO_CB ## htype(structname, fieldname)
+#define PB_SO_PB_HTYPE_REQUIRED(structname, fieldname) 0
+#define PB_SO_PB_HTYPE_SINGULAR(structname, fieldname) 0
+#define PB_SO_PB_HTYPE_ONEOF(structname, fieldname) PB_SO_PB_HTYPE_ONEOF2(structname, PB_ONEOF_NAME(FULL, fieldname), PB_ONEOF_NAME(UNION, fieldname))
+#define PB_SO_PB_HTYPE_ONEOF2(structname, fullname, unionname) PB_SO_PB_HTYPE_ONEOF3(structname, fullname, unionname)
+#define PB_SO_PB_HTYPE_ONEOF3(structname, fullname, unionname) pb_delta(structname, fullname, which_ ## unionname)
+#define PB_SO_PB_HTYPE_OPTIONAL(structname, fieldname) pb_delta(structname, fieldname, has_ ## fieldname)
+#define PB_SO_PB_HTYPE_REPEATED(structname, fieldname) pb_delta(structname, fieldname, fieldname ## _count)
+#define PB_SO_PB_HTYPE_FIXARRAY(structname, fieldname) 0
+#define PB_SO_PTR_PB_HTYPE_REQUIRED(structname, fieldname) 0
+#define PB_SO_PTR_PB_HTYPE_SINGULAR(structname, fieldname) 0
+#define PB_SO_PTR_PB_HTYPE_ONEOF(structname, fieldname) PB_SO_PB_HTYPE_ONEOF(structname, fieldname)
+#define PB_SO_PTR_PB_HTYPE_OPTIONAL(structname, fieldname) 0
+#define PB_SO_PTR_PB_HTYPE_REPEATED(structname, fieldname) PB_SO_PB_HTYPE_REPEATED(structname, fieldname)
+#define PB_SO_PTR_PB_HTYPE_FIXARRAY(structname, fieldname) 0
+#define PB_SO_CB_PB_HTYPE_REQUIRED(structname, fieldname) 0
+#define PB_SO_CB_PB_HTYPE_SINGULAR(structname, fieldname) 0
+#define PB_SO_CB_PB_HTYPE_ONEOF(structname, fieldname) PB_SO_PB_HTYPE_ONEOF(structname, fieldname)
+#define PB_SO_CB_PB_HTYPE_OPTIONAL(structname, fieldname) 0
+#define PB_SO_CB_PB_HTYPE_REPEATED(structname, fieldname) 0
+#define PB_SO_CB_PB_HTYPE_FIXARRAY(structname, fieldname) 0
+
+#define PB_ARRAY_SIZE_STATIC(htype, structname, fieldname) PB_AS ## htype(structname, fieldname)
+#define PB_ARRAY_SIZE_POINTER(htype, structname, fieldname) PB_AS_PTR ## htype(structname, fieldname)
+#define PB_ARRAY_SIZE_CALLBACK(htype, structname, fieldname) 1
+#define PB_AS_PB_HTYPE_REQUIRED(structname, fieldname) 1
+#define PB_AS_PB_HTYPE_SINGULAR(structname, fieldname) 1
+#define PB_AS_PB_HTYPE_OPTIONAL(structname, fieldname) 1
+#define PB_AS_PB_HTYPE_ONEOF(structname, fieldname) 1
+#define PB_AS_PB_HTYPE_REPEATED(structname, fieldname) pb_arraysize(structname, fieldname)
+#define PB_AS_PB_HTYPE_FIXARRAY(structname, fieldname) pb_arraysize(structname, fieldname)
+#define PB_AS_PTR_PB_HTYPE_REQUIRED(structname, fieldname) 1
+#define PB_AS_PTR_PB_HTYPE_SINGULAR(structname, fieldname) 1
+#define PB_AS_PTR_PB_HTYPE_OPTIONAL(structname, fieldname) 1
+#define PB_AS_PTR_PB_HTYPE_ONEOF(structname, fieldname) 1
+#define PB_AS_PTR_PB_HTYPE_REPEATED(structname, fieldname) 1
+#define PB_AS_PTR_PB_HTYPE_FIXARRAY(structname, fieldname) pb_arraysize(structname, fieldname[0])
+
+#define PB_DATA_SIZE_STATIC(htype, structname, fieldname) PB_DS ## htype(structname, fieldname)
+#define PB_DATA_SIZE_POINTER(htype, structname, fieldname) PB_DS_PTR ## htype(structname, fieldname)
+#define PB_DATA_SIZE_CALLBACK(htype, structname, fieldname) PB_DS_CB ## htype(structname, fieldname)
+#define PB_DS_PB_HTYPE_REQUIRED(structname, fieldname) pb_membersize(structname, fieldname)
+#define PB_DS_PB_HTYPE_SINGULAR(structname, fieldname) pb_membersize(structname, fieldname)
+#define PB_DS_PB_HTYPE_OPTIONAL(structname, fieldname) pb_membersize(structname, fieldname)
+#define PB_DS_PB_HTYPE_ONEOF(structname, fieldname) pb_membersize(structname, PB_ONEOF_NAME(FULL, fieldname))
+#define PB_DS_PB_HTYPE_REPEATED(structname, fieldname) pb_membersize(structname, fieldname[0])
+#define PB_DS_PB_HTYPE_FIXARRAY(structname, fieldname) pb_membersize(structname, fieldname[0])
+#define PB_DS_PTR_PB_HTYPE_REQUIRED(structname, fieldname) pb_membersize(structname, fieldname[0])
+#define PB_DS_PTR_PB_HTYPE_SINGULAR(structname, fieldname) pb_membersize(structname, fieldname[0])
+#define PB_DS_PTR_PB_HTYPE_OPTIONAL(structname, fieldname) pb_membersize(structname, fieldname[0])
+#define PB_DS_PTR_PB_HTYPE_ONEOF(structname, fieldname) pb_membersize(structname, PB_ONEOF_NAME(FULL, fieldname)[0])
+#define PB_DS_PTR_PB_HTYPE_REPEATED(structname, fieldname) pb_membersize(structname, fieldname[0])
+#define PB_DS_PTR_PB_HTYPE_FIXARRAY(structname, fieldname) pb_membersize(structname, fieldname[0][0])
+#define PB_DS_CB_PB_HTYPE_REQUIRED(structname, fieldname) pb_membersize(structname, fieldname)
+#define PB_DS_CB_PB_HTYPE_SINGULAR(structname, fieldname) pb_membersize(structname, fieldname)
+#define PB_DS_CB_PB_HTYPE_OPTIONAL(structname, fieldname) pb_membersize(structname, fieldname)
+#define PB_DS_CB_PB_HTYPE_ONEOF(structname, fieldname) pb_membersize(structname, PB_ONEOF_NAME(FULL, fieldname))
+#define PB_DS_CB_PB_HTYPE_REPEATED(structname, fieldname) pb_membersize(structname, fieldname)
+#define PB_DS_CB_PB_HTYPE_FIXARRAY(structname, fieldname) pb_membersize(structname, fieldname)
+
+#define PB_ONEOF_NAME(type, tuple) PB_EXPAND(PB_ONEOF_NAME_ ## type tuple)
+#define PB_ONEOF_NAME_UNION(unionname,membername,fullname) unionname
+#define PB_ONEOF_NAME_MEMBER(unionname,membername,fullname) membername
+#define PB_ONEOF_NAME_FULL(unionname,membername,fullname) fullname
+
+#define PB_GEN_SUBMSG_INFO(structname, atype, htype, ltype, fieldname, tag) \
+    PB_SUBMSG_INFO_ ## htype(_PB_LTYPE_ ## ltype, structname, fieldname)
+
+#define PB_SUBMSG_INFO_REQUIRED(ltype, structname, fieldname) PB_SI ## ltype(structname ## _ ## fieldname ## _MSGTYPE)
+#define PB_SUBMSG_INFO_SINGULAR(ltype, structname, fieldname) PB_SI ## ltype(structname ## _ ## fieldname ## _MSGTYPE)
+#define PB_SUBMSG_INFO_OPTIONAL(ltype, structname, fieldname) PB_SI ## ltype(structname ## _ ## fieldname ## _MSGTYPE)
+#define PB_SUBMSG_INFO_ONEOF(ltype, structname, fieldname) PB_SUBMSG_INFO_ONEOF2(ltype, structname, PB_ONEOF_NAME(UNION, fieldname), PB_ONEOF_NAME(MEMBER, fieldname))
+#define PB_SUBMSG_INFO_ONEOF2(ltype, structname, unionname, membername) PB_SUBMSG_INFO_ONEOF3(ltype, structname, unionname, membername)
+#define PB_SUBMSG_INFO_ONEOF3(ltype, structname, unionname, membername) PB_SI ## ltype(structname ## _ ## unionname ## _ ## membername ## _MSGTYPE)
+#define PB_SUBMSG_INFO_REPEATED(ltype, structname, fieldname) PB_SI ## ltype(structname ## _ ## fieldname ## _MSGTYPE)
+#define PB_SUBMSG_INFO_FIXARRAY(ltype, structname, fieldname) PB_SI ## ltype(structname ## _ ## fieldname ## _MSGTYPE)
+#define PB_SI_PB_LTYPE_BOOL(t)
+#define PB_SI_PB_LTYPE_BYTES(t)
+#define PB_SI_PB_LTYPE_DOUBLE(t)
+#define PB_SI_PB_LTYPE_ENUM(t)
+#define PB_SI_PB_LTYPE_UENUM(t)
+#define PB_SI_PB_LTYPE_FIXED32(t)
+#define PB_SI_PB_LTYPE_FIXED64(t)
+#define PB_SI_PB_LTYPE_FLOAT(t)
+#define PB_SI_PB_LTYPE_INT32(t)
+#define PB_SI_PB_LTYPE_INT64(t)
+#define PB_SI_PB_LTYPE_MESSAGE(t)  PB_SUBMSG_DESCRIPTOR(t)
+#define PB_SI_PB_LTYPE_MSG_W_CB(t) PB_SUBMSG_DESCRIPTOR(t)
+#define PB_SI_PB_LTYPE_SFIXED32(t)
+#define PB_SI_PB_LTYPE_SFIXED64(t)
+#define PB_SI_PB_LTYPE_SINT32(t)
+#define PB_SI_PB_LTYPE_SINT64(t)
+#define PB_SI_PB_LTYPE_STRING(t)
+#define PB_SI_PB_LTYPE_UINT32(t)
+#define PB_SI_PB_LTYPE_UINT64(t)
+#define PB_SI_PB_LTYPE_EXTENSION(t)
+#define PB_SI_PB_LTYPE_FIXED_LENGTH_BYTES(t)
+#define PB_SUBMSG_DESCRIPTOR(t)    &(t ## _msg),
+
+/* The field descriptors use a variable width format, with width of either
+ * 1, 2, 4 or 8 of 32-bit words. The two lowest bytes of the first byte always
+ * encode the descriptor size, 6 lowest bits of field tag number, and 8 bits
+ * of the field type.
+ *
+ * Descriptor size is encoded as 0 = 1 word, 1 = 2 words, 2 = 4 words, 3 = 8 words.
+ *
+ * Formats, listed starting with the least significant bit of the first word.
+ * 1 word:  [2-bit len] [6-bit tag] [8-bit type] [8-bit data_offset] [4-bit size_offset] [4-bit data_size]
+ *
+ * 2 words: [2-bit len] [6-bit tag] [8-bit type] [12-bit array_size] [4-bit size_offset]
+ *          [16-bit data_offset] [12-bit data_size] [4-bit tag>>6]
+ *
+ * 4 words: [2-bit len] [6-bit tag] [8-bit type] [16-bit array_size]
+ *          [8-bit size_offset] [24-bit tag>>6]
+ *          [32-bit data_offset]
+ *          [32-bit data_size]
+ *
+ * 8 words: [2-bit len] [6-bit tag] [8-bit type] [16-bit reserved]
+ *          [8-bit size_offset] [24-bit tag>>6]
+ *          [32-bit data_offset]
+ *          [32-bit data_size]
+ *          [32-bit array_size]
+ *          [32-bit reserved]
+ *          [32-bit reserved]
+ *          [32-bit reserved]
+ */
+
+#define PB_FIELDINFO_1(tag, type, data_offset, data_size, size_offset, array_size) \
+    (0 | (((tag) << 2) & 0xFF) | ((type) << 8) | (((uint32_t)(data_offset) & 0xFF) << 16) | \
+     (((uint32_t)(size_offset) & 0x0F) << 24) | (((uint32_t)(data_size) & 0x0F) << 28)),
+
+#define PB_FIELDINFO_2(tag, type, data_offset, data_size, size_offset, array_size) \
+    (1 | (((tag) << 2) & 0xFF) | ((type) << 8) | (((uint32_t)(array_size) & 0xFFF) << 16) | (((uint32_t)(size_offset) & 0x0F) << 28)), \
+    (((uint32_t)(data_offset) & 0xFFFF) | (((uint32_t)(data_size) & 0xFFF) << 16) | (((uint32_t)(tag) & 0x3c0) << 22)),
+
+#define PB_FIELDINFO_4(tag, type, data_offset, data_size, size_offset, array_size) \
+    (2 | (((tag) << 2) & 0xFF) | ((type) << 8) | (((uint32_t)(array_size) & 0xFFFF) << 16)), \
+    ((uint32_t)(int_least8_t)(size_offset) | (((uint32_t)(tag) << 2) & 0xFFFFFF00)), \
+    (data_offset), (data_size),
+
+#define PB_FIELDINFO_8(tag, type, data_offset, data_size, size_offset, array_size) \
+    (3 | (((tag) << 2) & 0xFF) | ((type) << 8)), \
+    ((uint32_t)(int_least8_t)(size_offset) | (((uint32_t)(tag) << 2) & 0xFFFFFF00)), \
+    (data_offset), (data_size), (array_size), 0, 0, 0,
+
+/* These assertions verify that the field information fits in the allocated space.
+ * The generator tries to automatically determine the correct width that can fit all
+ * data associated with a message. These asserts will fail only if there has been a
+ * problem in the automatic logic - this may be worth reporting as a bug. As a workaround,
+ * you can increase the descriptor width by defining PB_FIELDINFO_WIDTH or by setting
+ * descriptorsize option in .options file.
+ */
+#define PB_FITS(value,bits) ((uint32_t)(value) < ((uint32_t)1<<bits))
+#define PB_FIELDINFO_ASSERT_1(tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_STATIC_ASSERT(PB_FITS(tag,6) && PB_FITS(data_offset,8) && PB_FITS(size_offset,4) && PB_FITS(data_size,4) && PB_FITS(array_size,1), FIELDINFO_DOES_NOT_FIT_width1_field ## tag)
+
+#define PB_FIELDINFO_ASSERT_2(tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_STATIC_ASSERT(PB_FITS(tag,10) && PB_FITS(data_offset,16) && PB_FITS(size_offset,4) && PB_FITS(data_size,12) && PB_FITS(array_size,12), FIELDINFO_DOES_NOT_FIT_width2_field ## tag)
+
+#ifndef PB_FIELD_32BIT
+/* Maximum field sizes are still 16-bit if pb_size_t is 16-bit */
+#define PB_FIELDINFO_ASSERT_4(tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_STATIC_ASSERT(PB_FITS(tag,16) && PB_FITS(data_offset,16) && PB_FITS((int_least8_t)size_offset,8) && PB_FITS(data_size,16) && PB_FITS(array_size,16), FIELDINFO_DOES_NOT_FIT_width4_field ## tag)
+
+#define PB_FIELDINFO_ASSERT_8(tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_STATIC_ASSERT(PB_FITS(tag,16) && PB_FITS(data_offset,16) && PB_FITS((int_least8_t)size_offset,8) && PB_FITS(data_size,16) && PB_FITS(array_size,16), FIELDINFO_DOES_NOT_FIT_width8_field ## tag)
+#else
+/* Up to 32-bit fields supported.
+ * Note that the checks are against 31 bits to avoid compiler warnings about shift wider than type in the test.
+ * I expect that there is no reasonable use for >2GB messages with nanopb anyway.
+ */
+#define PB_FIELDINFO_ASSERT_4(tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_STATIC_ASSERT(PB_FITS(tag,30) && PB_FITS(data_offset,31) && PB_FITS(size_offset,8) && PB_FITS(data_size,31) && PB_FITS(array_size,16), FIELDINFO_DOES_NOT_FIT_width4_field ## tag)
+
+#define PB_FIELDINFO_ASSERT_8(tag, type, data_offset, data_size, size_offset, array_size) \
+    PB_STATIC_ASSERT(PB_FITS(tag,30) && PB_FITS(data_offset,31) && PB_FITS(size_offset,8) && PB_FITS(data_size,31) && PB_FITS(array_size,31), FIELDINFO_DOES_NOT_FIT_width8_field ## tag)
+#endif
+
+
+/* Automatic picking of FIELDINFO width:
+ * Uses width 1 when possible, otherwise resorts to width 2.
+ * This is used when PB_BIND() is called with "AUTO" as the argument.
+ * The generator will give explicit size argument when it knows that a message
+ * structure grows beyond 1-word format limits.
+ */
+#define PB_FIELDINFO_WIDTH_AUTO(atype, htype, ltype) PB_FI_WIDTH ## atype(htype, ltype)
+#define PB_FI_WIDTH_PB_ATYPE_STATIC(htype, ltype) PB_FI_WIDTH ## htype(ltype)
+#define PB_FI_WIDTH_PB_ATYPE_POINTER(htype, ltype) PB_FI_WIDTH ## htype(ltype)
+#define PB_FI_WIDTH_PB_ATYPE_CALLBACK(htype, ltype) 2
+#define PB_FI_WIDTH_PB_HTYPE_REQUIRED(ltype) PB_FI_WIDTH ## ltype
+#define PB_FI_WIDTH_PB_HTYPE_SINGULAR(ltype) PB_FI_WIDTH ## ltype
+#define PB_FI_WIDTH_PB_HTYPE_OPTIONAL(ltype) PB_FI_WIDTH ## ltype
+#define PB_FI_WIDTH_PB_HTYPE_ONEOF(ltype) PB_FI_WIDTH ## ltype
+#define PB_FI_WIDTH_PB_HTYPE_REPEATED(ltype) 2
+#define PB_FI_WIDTH_PB_HTYPE_FIXARRAY(ltype) 2
+#define PB_FI_WIDTH_PB_LTYPE_BOOL      1
+#define PB_FI_WIDTH_PB_LTYPE_BYTES     2
+#define PB_FI_WIDTH_PB_LTYPE_DOUBLE    1
+#define PB_FI_WIDTH_PB_LTYPE_ENUM      1
+#define PB_FI_WIDTH_PB_LTYPE_UENUM     1
+#define PB_FI_WIDTH_PB_LTYPE_FIXED32   1
+#define PB_FI_WIDTH_PB_LTYPE_FIXED64   1
+#define PB_FI_WIDTH_PB_LTYPE_FLOAT     1
+#define PB_FI_WIDTH_PB_LTYPE_INT32     1
+#define PB_FI_WIDTH_PB_LTYPE_INT64     1
+#define PB_FI_WIDTH_PB_LTYPE_MESSAGE   2
+#define PB_FI_WIDTH_PB_LTYPE_MSG_W_CB  2
+#define PB_FI_WIDTH_PB_LTYPE_SFIXED32  1
+#define PB_FI_WIDTH_PB_LTYPE_SFIXED64  1
+#define PB_FI_WIDTH_PB_LTYPE_SINT32    1
+#define PB_FI_WIDTH_PB_LTYPE_SINT64    1
+#define PB_FI_WIDTH_PB_LTYPE_STRING    2
+#define PB_FI_WIDTH_PB_LTYPE_UINT32    1
+#define PB_FI_WIDTH_PB_LTYPE_UINT64    1
+#define PB_FI_WIDTH_PB_LTYPE_EXTENSION 1
+#define PB_FI_WIDTH_PB_LTYPE_FIXED_LENGTH_BYTES 2
+
+/* The mapping from protobuf types to LTYPEs is done using these macros. */
+#define PB_LTYPE_MAP_BOOL               PB_LTYPE_BOOL
+#define PB_LTYPE_MAP_BYTES              PB_LTYPE_BYTES
+#define PB_LTYPE_MAP_DOUBLE             PB_LTYPE_FIXED64
+#define PB_LTYPE_MAP_ENUM               PB_LTYPE_VARINT
+#define PB_LTYPE_MAP_UENUM              PB_LTYPE_UVARINT
+#define PB_LTYPE_MAP_FIXED32            PB_LTYPE_FIXED32
+#define PB_LTYPE_MAP_FIXED64            PB_LTYPE_FIXED64
+#define PB_LTYPE_MAP_FLOAT              PB_LTYPE_FIXED32
+#define PB_LTYPE_MAP_INT32              PB_LTYPE_VARINT
+#define PB_LTYPE_MAP_INT64              PB_LTYPE_VARINT
+#define PB_LTYPE_MAP_MESSAGE            PB_LTYPE_SUBMESSAGE
+#define PB_LTYPE_MAP_MSG_W_CB           PB_LTYPE_SUBMSG_W_CB
+#define PB_LTYPE_MAP_SFIXED32           PB_LTYPE_FIXED32
+#define PB_LTYPE_MAP_SFIXED64           PB_LTYPE_FIXED64
+#define PB_LTYPE_MAP_SINT32             PB_LTYPE_SVARINT
+#define PB_LTYPE_MAP_SINT64             PB_LTYPE_SVARINT
+#define PB_LTYPE_MAP_STRING             PB_LTYPE_STRING
+#define PB_LTYPE_MAP_UINT32             PB_LTYPE_UVARINT
+#define PB_LTYPE_MAP_UINT64             PB_LTYPE_UVARINT
+#define PB_LTYPE_MAP_EXTENSION          PB_LTYPE_EXTENSION
+#define PB_LTYPE_MAP_FIXED_LENGTH_BYTES PB_LTYPE_FIXED_LENGTH_BYTES
+
+/* These macros are used for giving out error messages.
+ * They are mostly a debugging aid; the main error information
+ * is the true/false return value from functions.
+ * Some code space can be saved by disabling the error
+ * messages if not used.
+ *
+ * PB_SET_ERROR() sets the error message if none has been set yet.
+ *                msg must be a constant string literal.
+ * PB_GET_ERROR() always returns a pointer to a string.
+ * PB_RETURN_ERROR() sets the error and returns false from current
+ *                   function.
+ */
+#ifdef PB_NO_ERRMSG
+#define PB_SET_ERROR(stream, msg) PB_UNUSED(stream)
+#define PB_GET_ERROR(stream) "(errmsg disabled)"
+#else
+#define PB_SET_ERROR(stream, msg) (stream->errmsg = (stream)->errmsg ? (stream)->errmsg : (msg))
+#define PB_GET_ERROR(stream) ((stream)->errmsg ? (stream)->errmsg : "(none)")
+#endif
+
+#define PB_RETURN_ERROR(stream, msg) return PB_SET_ERROR(stream, msg), false
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#ifdef __cplusplus
+#if __cplusplus >= 201103L
+#define PB_CONSTEXPR constexpr
+#else  // __cplusplus >= 201103L
+#define PB_CONSTEXPR
+#endif  // __cplusplus >= 201103L
+
+#if __cplusplus >= 201703L
+#define PB_INLINE_CONSTEXPR inline constexpr
+#else  // __cplusplus >= 201703L
+#define PB_INLINE_CONSTEXPR PB_CONSTEXPR
+#endif  // __cplusplus >= 201703L
+
+namespace nanopb {
+// Each type will be partially specialized by the generator.
+template <typename GenMessageT> struct MessageDescriptor;
+}  // namespace nanopb
+#endif  /* __cplusplus */
+
+#endif
+

diff --git a/security/container/protos/nanopb/pb_common.c b/security/container/protos/nanopb/pb_common.c
new file mode 100644
index 0000000..6aee76b
--- /dev/null
+++ b/security/container/protos/nanopb/pb_common.c

@@ -0,0 +1,388 @@
+/* pb_common.c: Common support functions for pb_encode.c and pb_decode.c.
+ *
+ * 2014 Petteri Aimonen <jpa@kapsi.fi>
+ */
+
+#include "pb_common.h"
+
+static bool load_descriptor_values(pb_field_iter_t *iter)
+{
+    uint32_t word0;
+    uint32_t data_offset;
+    int_least8_t size_offset;
+
+    if (iter->index >= iter->descriptor->field_count)
+        return false;
+
+    word0 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
+    iter->type = (pb_type_t)((word0 >> 8) & 0xFF);
+
+    switch(word0 & 3)
+    {
+        case 0: {
+            /* 1-word format */
+            iter->array_size = 1;
+            iter->tag = (pb_size_t)((word0 >> 2) & 0x3F);
+            size_offset = (int_least8_t)((word0 >> 24) & 0x0F);
+            data_offset = (word0 >> 16) & 0xFF;
+            iter->data_size = (pb_size_t)((word0 >> 28) & 0x0F);
+            break;
+        }
+
+        case 1: {
+            /* 2-word format */
+            uint32_t word1 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 1]);
+
+            iter->array_size = (pb_size_t)((word0 >> 16) & 0x0FFF);
+            iter->tag = (pb_size_t)(((word0 >> 2) & 0x3F) | ((word1 >> 28) << 6));
+            size_offset = (int_least8_t)((word0 >> 28) & 0x0F);
+            data_offset = word1 & 0xFFFF;
+            iter->data_size = (pb_size_t)((word1 >> 16) & 0x0FFF);
+            break;
+        }
+
+        case 2: {
+            /* 4-word format */
+            uint32_t word1 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 1]);
+            uint32_t word2 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 2]);
+            uint32_t word3 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 3]);
+
+            iter->array_size = (pb_size_t)(word0 >> 16);
+            iter->tag = (pb_size_t)(((word0 >> 2) & 0x3F) | ((word1 >> 8) << 6));
+            size_offset = (int_least8_t)(word1 & 0xFF);
+            data_offset = word2;
+            iter->data_size = (pb_size_t)word3;
+            break;
+        }
+
+        default: {
+            /* 8-word format */
+            uint32_t word1 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 1]);
+            uint32_t word2 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 2]);
+            uint32_t word3 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 3]);
+            uint32_t word4 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 4]);
+
+            iter->array_size = (pb_size_t)word4;
+            iter->tag = (pb_size_t)(((word0 >> 2) & 0x3F) | ((word1 >> 8) << 6));
+            size_offset = (int_least8_t)(word1 & 0xFF);
+            data_offset = word2;
+            iter->data_size = (pb_size_t)word3;
+            break;
+        }
+    }
+
+    if (!iter->message)
+    {
+        /* Avoid doing arithmetic on null pointers, it is undefined */
+        iter->pField = NULL;
+        iter->pSize = NULL;
+    }
+    else
+    {
+        iter->pField = (char*)iter->message + data_offset;
+
+        if (size_offset)
+        {
+            iter->pSize = (char*)iter->pField - size_offset;
+        }
+        else if (PB_HTYPE(iter->type) == PB_HTYPE_REPEATED &&
+                 (PB_ATYPE(iter->type) == PB_ATYPE_STATIC ||
+                  PB_ATYPE(iter->type) == PB_ATYPE_POINTER))
+        {
+            /* Fixed count array */
+            iter->pSize = &iter->array_size;
+        }
+        else
+        {
+            iter->pSize = NULL;
+        }
+
+        if (PB_ATYPE(iter->type) == PB_ATYPE_POINTER && iter->pField != NULL)
+        {
+            iter->pData = *(void**)iter->pField;
+        }
+        else
+        {
+            iter->pData = iter->pField;
+        }
+    }
+
+    if (PB_LTYPE_IS_SUBMSG(iter->type))
+    {
+        iter->submsg_desc = iter->descriptor->submsg_info[iter->submessage_index];
+    }
+    else
+    {
+        iter->submsg_desc = NULL;
+    }
+
+    return true;
+}
+
+static void advance_iterator(pb_field_iter_t *iter)
+{
+    iter->index++;
+
+    if (iter->index >= iter->descriptor->field_count)
+    {
+        /* Restart */
+        iter->index = 0;
+        iter->field_info_index = 0;
+        iter->submessage_index = 0;
+        iter->required_field_index = 0;
+    }
+    else
+    {
+        /* Increment indexes based on previous field type.
+         * All field info formats have the following fields:
+         * - lowest 2 bits tell the amount of words in the descriptor (2^n words)
+         * - bits 2..7 give the lowest bits of tag number.
+         * - bits 8..15 give the field type.
+         */
+        uint32_t prev_descriptor = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
+        pb_type_t prev_type = (prev_descriptor >> 8) & 0xFF;
+        pb_size_t descriptor_len = (pb_size_t)(1 << (prev_descriptor & 3));
+
+        /* Add to fields.
+         * The cast to pb_size_t is needed to avoid -Wconversion warning.
+         * Because the data is is constants from generator, there is no danger of overflow.
+         */
+        iter->field_info_index = (pb_size_t)(iter->field_info_index + descriptor_len);
+        iter->required_field_index = (pb_size_t)(iter->required_field_index + (PB_HTYPE(prev_type) == PB_HTYPE_REQUIRED));
+        iter->submessage_index = (pb_size_t)(iter->submessage_index + PB_LTYPE_IS_SUBMSG(prev_type));
+    }
+}
+
+bool pb_field_iter_begin(pb_field_iter_t *iter, const pb_msgdesc_t *desc, void *message)
+{
+    memset(iter, 0, sizeof(*iter));
+
+    iter->descriptor = desc;
+    iter->message = message;
+
+    return load_descriptor_values(iter);
+}
+
+bool pb_field_iter_begin_extension(pb_field_iter_t *iter, pb_extension_t *extension)
+{
+    const pb_msgdesc_t *msg = (const pb_msgdesc_t*)extension->type->arg;
+    bool status;
+
+    uint32_t word0 = PB_PROGMEM_READU32(msg->field_info[0]);
+    if (PB_ATYPE(word0 >> 8) == PB_ATYPE_POINTER)
+    {
+        /* For pointer extensions, the pointer is stored directly
+         * in the extension structure. This avoids having an extra
+         * indirection. */
+        status = pb_field_iter_begin(iter, msg, &extension->dest);
+    }
+    else
+    {
+        status = pb_field_iter_begin(iter, msg, extension->dest);
+    }
+
+    iter->pSize = &extension->found;
+    return status;
+}
+
+bool pb_field_iter_next(pb_field_iter_t *iter)
+{
+    advance_iterator(iter);
+    (void)load_descriptor_values(iter);
+    return iter->index != 0;
+}
+
+bool pb_field_iter_find(pb_field_iter_t *iter, uint32_t tag)
+{
+    if (iter->tag == tag)
+    {
+        return true; /* Nothing to do, correct field already. */
+    }
+    else if (tag > iter->descriptor->largest_tag)
+    {
+        return false;
+    }
+    else
+    {
+        pb_size_t start = iter->index;
+        uint32_t fieldinfo;
+
+        if (tag < iter->tag)
+        {
+            /* Fields are in tag number order, so we know that tag is between
+             * 0 and our start position. Setting index to end forces
+             * advance_iterator() call below to restart from beginning. */
+            iter->index = iter->descriptor->field_count;
+        }
+
+        do
+        {
+            /* Advance iterator but don't load values yet */
+            advance_iterator(iter);
+
+            /* Do fast check for tag number match */
+            fieldinfo = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
+
+            if (((fieldinfo >> 2) & 0x3F) == (tag & 0x3F))
+            {
+                /* Good candidate, check further */
+                (void)load_descriptor_values(iter);
+
+                if (iter->tag == tag &&
+                    PB_LTYPE(iter->type) != PB_LTYPE_EXTENSION)
+                {
+                    /* Found it */
+                    return true;
+                }
+            }
+        } while (iter->index != start);
+
+        /* Searched all the way back to start, and found nothing. */
+        (void)load_descriptor_values(iter);
+        return false;
+    }
+}
+
+bool pb_field_iter_find_extension(pb_field_iter_t *iter)
+{
+    if (PB_LTYPE(iter->type) == PB_LTYPE_EXTENSION)
+    {
+        return true;
+    }
+    else
+    {
+        pb_size_t start = iter->index;
+        uint32_t fieldinfo;
+
+        do
+        {
+            /* Advance iterator but don't load values yet */
+            advance_iterator(iter);
+
+            /* Do fast check for field type */
+            fieldinfo = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
+
+            if (PB_LTYPE((fieldinfo >> 8) & 0xFF) == PB_LTYPE_EXTENSION)
+            {
+                return load_descriptor_values(iter);
+            }
+        } while (iter->index != start);
+
+        /* Searched all the way back to start, and found nothing. */
+        (void)load_descriptor_values(iter);
+        return false;
+    }
+}
+
+static void *pb_const_cast(const void *p)
+{
+    /* Note: this casts away const, in order to use the common field iterator
+     * logic for both encoding and decoding. The cast is done using union
+     * to avoid spurious compiler warnings. */
+    union {
+        void *p1;
+        const void *p2;
+    } t;
+    t.p2 = p;
+    return t.p1;
+}
+
+bool pb_field_iter_begin_const(pb_field_iter_t *iter, const pb_msgdesc_t *desc, const void *message)
+{
+    return pb_field_iter_begin(iter, desc, pb_const_cast(message));
+}
+
+bool pb_field_iter_begin_extension_const(pb_field_iter_t *iter, const pb_extension_t *extension)
+{
+    return pb_field_iter_begin_extension(iter, (pb_extension_t*)pb_const_cast(extension));
+}
+
+bool pb_default_field_callback(pb_istream_t *istream, pb_ostream_t *ostream, const pb_field_t *field)
+{
+    if (field->data_size == sizeof(pb_callback_t))
+    {
+        pb_callback_t *pCallback = (pb_callback_t*)field->pData;
+
+        if (pCallback != NULL)
+        {
+            if (istream != NULL && pCallback->funcs.decode != NULL)
+            {
+                return pCallback->funcs.decode(istream, field, &pCallback->arg);
+            }
+
+            if (ostream != NULL && pCallback->funcs.encode != NULL)
+            {
+                return pCallback->funcs.encode(ostream, field, &pCallback->arg);
+            }
+        }
+    }
+
+    return true; /* Success, but didn't do anything */
+
+}
+
+#ifdef PB_VALIDATE_UTF8
+
+/* This function checks whether a string is valid UTF-8 text.
+ *
+ * Algorithm is adapted from https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
+ * Original copyright: Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> 2005-03-30
+ * Licensed under "Short code license", which allows use under MIT license or
+ * any compatible with it.
+ */
+
+bool pb_validate_utf8(const char *str)
+{
+    const pb_byte_t *s = (const pb_byte_t*)str;
+    while (*s)
+    {
+        if (*s < 0x80)
+        {
+            /* 0xxxxxxx */
+            s++;
+        }
+        else if ((s[0] & 0xe0) == 0xc0)
+        {
+            /* 110XXXXx 10xxxxxx */
+            if ((s[1] & 0xc0) != 0x80 ||
+                (s[0] & 0xfe) == 0xc0)                        /* overlong? */
+                return false;
+            else
+                s += 2;
+        }
+        else if ((s[0] & 0xf0) == 0xe0)
+        {
+            /* 1110XXXX 10Xxxxxx 10xxxxxx */
+            if ((s[1] & 0xc0) != 0x80 ||
+                (s[2] & 0xc0) != 0x80 ||
+                (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) ||    /* overlong? */
+                (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) ||    /* surrogate? */
+                (s[0] == 0xef && s[1] == 0xbf &&
+                (s[2] & 0xfe) == 0xbe))                 /* U+FFFE or U+FFFF? */
+                return false;
+            else
+                s += 3;
+        }
+        else if ((s[0] & 0xf8) == 0xf0)
+        {
+            /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
+            if ((s[1] & 0xc0) != 0x80 ||
+                (s[2] & 0xc0) != 0x80 ||
+                (s[3] & 0xc0) != 0x80 ||
+                (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) ||    /* overlong? */
+                (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) /* > U+10FFFF? */
+                return false;
+            else
+                s += 4;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+#endif
+

diff --git a/security/container/protos/nanopb/pb_common.h b/security/container/protos/nanopb/pb_common.h
new file mode 100644
index 0000000..58aa90f
--- /dev/null
+++ b/security/container/protos/nanopb/pb_common.h

@@ -0,0 +1,49 @@
+/* pb_common.h: Common support functions for pb_encode.c and pb_decode.c.
+ * These functions are rarely needed by applications directly.
+ */
+
+#ifndef PB_COMMON_H_INCLUDED
+#define PB_COMMON_H_INCLUDED
+
+#include "pb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Initialize the field iterator structure to beginning.
+ * Returns false if the message type is empty. */
+bool pb_field_iter_begin(pb_field_iter_t *iter, const pb_msgdesc_t *desc, void *message);
+
+/* Get a field iterator for extension field. */
+bool pb_field_iter_begin_extension(pb_field_iter_t *iter, pb_extension_t *extension);
+
+/* Same as pb_field_iter_begin(), but for const message pointer.
+ * Note that the pointers in pb_field_iter_t will be non-const but shouldn't
+ * be written to when using these functions. */
+bool pb_field_iter_begin_const(pb_field_iter_t *iter, const pb_msgdesc_t *desc, const void *message);
+bool pb_field_iter_begin_extension_const(pb_field_iter_t *iter, const pb_extension_t *extension);
+
+/* Advance the iterator to the next field.
+ * Returns false when the iterator wraps back to the first field. */
+bool pb_field_iter_next(pb_field_iter_t *iter);
+
+/* Advance the iterator until it points at a field with the given tag.
+ * Returns false if no such field exists. */
+bool pb_field_iter_find(pb_field_iter_t *iter, uint32_t tag);
+
+/* Find a field with type PB_LTYPE_EXTENSION, or return false if not found.
+ * There can be only one extension range field per message. */
+bool pb_field_iter_find_extension(pb_field_iter_t *iter);
+
+#ifdef PB_VALIDATE_UTF8
+/* Validate UTF-8 text string */
+bool pb_validate_utf8(const char *s);
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
+

diff --git a/security/container/protos/nanopb/pb_decode.c b/security/container/protos/nanopb/pb_decode.c
new file mode 100644
index 0000000..b194825
--- /dev/null
+++ b/security/container/protos/nanopb/pb_decode.c

@@ -0,0 +1,1709 @@
+/* pb_decode.c -- decode a protobuf using minimal resources
+ *
+ * 2011 Petteri Aimonen <jpa@kapsi.fi>
+ */
+
+/* Use the GCC warn_unused_result attribute to check that all return values
+ * are propagated correctly. On other compilers and gcc before 3.4.0 just
+ * ignore the annotation.
+ */
+#if !defined(__GNUC__) || ( __GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)
+    #define checkreturn
+#else
+    #define checkreturn __attribute__((warn_unused_result))
+#endif
+
+#include "pb.h"
+#include "pb_decode.h"
+#include "pb_common.h"
+
+/**************************************
+ * Declarations internal to this file *
+ **************************************/
+
+static bool checkreturn buf_read(pb_istream_t *stream, pb_byte_t *buf, size_t count);
+static bool checkreturn pb_decode_varint32_eof(pb_istream_t *stream, uint32_t *dest, bool *eof);
+static bool checkreturn read_raw_value(pb_istream_t *stream, pb_wire_type_t wire_type, pb_byte_t *buf, size_t *size);
+static bool checkreturn decode_basic_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field);
+static bool checkreturn decode_static_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field);
+static bool checkreturn decode_pointer_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field);
+static bool checkreturn decode_callback_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field);
+static bool checkreturn decode_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field);
+static bool checkreturn default_extension_decoder(pb_istream_t *stream, pb_extension_t *extension, uint32_t tag, pb_wire_type_t wire_type);
+static bool checkreturn decode_extension(pb_istream_t *stream, uint32_t tag, pb_wire_type_t wire_type, pb_extension_t *extension);
+static bool pb_field_set_to_default(pb_field_iter_t *field);
+static bool pb_message_set_to_defaults(pb_field_iter_t *iter);
+static bool checkreturn pb_dec_bool(pb_istream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_dec_varint(pb_istream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_dec_bytes(pb_istream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_dec_string(pb_istream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_dec_submessage(pb_istream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_dec_fixed_length_bytes(pb_istream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_skip_varint(pb_istream_t *stream);
+static bool checkreturn pb_skip_string(pb_istream_t *stream);
+
+#ifdef PB_ENABLE_MALLOC
+static bool checkreturn allocate_field(pb_istream_t *stream, void *pData, size_t data_size, size_t array_size);
+static void initialize_pointer_field(void *pItem, pb_field_iter_t *field);
+static bool checkreturn pb_release_union_field(pb_istream_t *stream, pb_field_iter_t *field);
+static void pb_release_single_field(pb_field_iter_t *field);
+#endif
+
+#ifdef PB_WITHOUT_64BIT
+#define pb_int64_t int32_t
+#define pb_uint64_t uint32_t
+#else
+#define pb_int64_t int64_t
+#define pb_uint64_t uint64_t
+#endif
+
+#define PB_WT_PACKED ((pb_wire_type_t)0xFF)
+
+typedef struct {
+    uint32_t bitfield[(PB_MAX_REQUIRED_FIELDS + 31) / 32];
+} pb_fields_seen_t;
+
+/*******************************
+ * pb_istream_t implementation *
+ *******************************/
+
+static bool checkreturn buf_read(pb_istream_t *stream, pb_byte_t *buf, size_t count)
+{
+    size_t i;
+    const pb_byte_t *source = (const pb_byte_t*)stream->state;
+    stream->state = (pb_byte_t*)stream->state + count;
+    
+    if (buf != NULL)
+    {
+        for (i = 0; i < count; i++)
+            buf[i] = source[i];
+    }
+    
+    return true;
+}
+
+bool checkreturn pb_read(pb_istream_t *stream, pb_byte_t *buf, size_t count)
+{
+    if (count == 0)
+        return true;
+
+#ifndef PB_BUFFER_ONLY
+	if (buf == NULL && stream->callback != buf_read)
+	{
+		/* Skip input bytes */
+		pb_byte_t tmp[16];
+		while (count > 16)
+		{
+			if (!pb_read(stream, tmp, 16))
+				return false;
+			
+			count -= 16;
+		}
+		
+		return pb_read(stream, tmp, count);
+	}
+#endif
+
+    if (stream->bytes_left < count)
+        PB_RETURN_ERROR(stream, "end-of-stream");
+    
+#ifndef PB_BUFFER_ONLY
+    if (!stream->callback(stream, buf, count))
+        PB_RETURN_ERROR(stream, "io error");
+#else
+    if (!buf_read(stream, buf, count))
+        return false;
+#endif
+    
+    stream->bytes_left -= count;
+    return true;
+}
+
+/* Read a single byte from input stream. buf may not be NULL.
+ * This is an optimization for the varint decoding. */
+static bool checkreturn pb_readbyte(pb_istream_t *stream, pb_byte_t *buf)
+{
+    if (stream->bytes_left == 0)
+        PB_RETURN_ERROR(stream, "end-of-stream");
+
+#ifndef PB_BUFFER_ONLY
+    if (!stream->callback(stream, buf, 1))
+        PB_RETURN_ERROR(stream, "io error");
+#else
+    *buf = *(const pb_byte_t*)stream->state;
+    stream->state = (pb_byte_t*)stream->state + 1;
+#endif
+
+    stream->bytes_left--;
+    
+    return true;    
+}
+
+pb_istream_t pb_istream_from_buffer(const pb_byte_t *buf, size_t msglen)
+{
+    pb_istream_t stream;
+    /* Cast away the const from buf without a compiler error.  We are
+     * careful to use it only in a const manner in the callbacks.
+     */
+    union {
+        void *state;
+        const void *c_state;
+    } state;
+#ifdef PB_BUFFER_ONLY
+    stream.callback = NULL;
+#else
+    stream.callback = &buf_read;
+#endif
+    state.c_state = buf;
+    stream.state = state.state;
+    stream.bytes_left = msglen;
+#ifndef PB_NO_ERRMSG
+    stream.errmsg = NULL;
+#endif
+    return stream;
+}
+
+/********************
+ * Helper functions *
+ ********************/
+
+static bool checkreturn pb_decode_varint32_eof(pb_istream_t *stream, uint32_t *dest, bool *eof)
+{
+    pb_byte_t byte;
+    uint32_t result;
+    
+    if (!pb_readbyte(stream, &byte))
+    {
+        if (stream->bytes_left == 0)
+        {
+            if (eof)
+            {
+                *eof = true;
+            }
+        }
+
+        return false;
+    }
+    
+    if ((byte & 0x80) == 0)
+    {
+        /* Quick case, 1 byte value */
+        result = byte;
+    }
+    else
+    {
+        /* Multibyte case */
+        uint_fast8_t bitpos = 7;
+        result = byte & 0x7F;
+        
+        do
+        {
+            if (!pb_readbyte(stream, &byte))
+                return false;
+            
+            if (bitpos >= 32)
+            {
+                /* Note: The varint could have trailing 0x80 bytes, or 0xFF for negative. */
+                pb_byte_t sign_extension = (bitpos < 63) ? 0xFF : 0x01;
+                bool valid_extension = ((byte & 0x7F) == 0x00 ||
+                         ((result >> 31) != 0 && byte == sign_extension));
+
+                if (bitpos >= 64 || !valid_extension)
+                {
+                    PB_RETURN_ERROR(stream, "varint overflow");
+                }
+            }
+            else
+            {
+                result |= (uint32_t)(byte & 0x7F) << bitpos;
+            }
+            bitpos = (uint_fast8_t)(bitpos + 7);
+        } while (byte & 0x80);
+        
+        if (bitpos == 35 && (byte & 0x70) != 0)
+        {
+            /* The last byte was at bitpos=28, so only bottom 4 bits fit. */
+            PB_RETURN_ERROR(stream, "varint overflow");
+        }
+   }
+   
+   *dest = result;
+   return true;
+}
+
+bool checkreturn pb_decode_varint32(pb_istream_t *stream, uint32_t *dest)
+{
+    return pb_decode_varint32_eof(stream, dest, NULL);
+}
+
+#ifndef PB_WITHOUT_64BIT
+bool checkreturn pb_decode_varint(pb_istream_t *stream, uint64_t *dest)
+{
+    pb_byte_t byte;
+    uint_fast8_t bitpos = 0;
+    uint64_t result = 0;
+    
+    do
+    {
+        if (bitpos >= 64)
+            PB_RETURN_ERROR(stream, "varint overflow");
+        
+        if (!pb_readbyte(stream, &byte))
+            return false;
+
+        result |= (uint64_t)(byte & 0x7F) << bitpos;
+        bitpos = (uint_fast8_t)(bitpos + 7);
+    } while (byte & 0x80);
+    
+    *dest = result;
+    return true;
+}
+#endif
+
+bool checkreturn pb_skip_varint(pb_istream_t *stream)
+{
+    pb_byte_t byte;
+    do
+    {
+        if (!pb_read(stream, &byte, 1))
+            return false;
+    } while (byte & 0x80);
+    return true;
+}
+
+bool checkreturn pb_skip_string(pb_istream_t *stream)
+{
+    uint32_t length;
+    if (!pb_decode_varint32(stream, &length))
+        return false;
+    
+    if ((size_t)length != length)
+    {
+        PB_RETURN_ERROR(stream, "size too large");
+    }
+
+    return pb_read(stream, NULL, (size_t)length);
+}
+
+bool checkreturn pb_decode_tag(pb_istream_t *stream, pb_wire_type_t *wire_type, uint32_t *tag, bool *eof)
+{
+    uint32_t temp;
+    *eof = false;
+    *wire_type = (pb_wire_type_t) 0;
+    *tag = 0;
+    
+    if (!pb_decode_varint32_eof(stream, &temp, eof))
+    {
+        return false;
+    }
+    
+    *tag = temp >> 3;
+    *wire_type = (pb_wire_type_t)(temp & 7);
+    return true;
+}
+
+bool checkreturn pb_skip_field(pb_istream_t *stream, pb_wire_type_t wire_type)
+{
+    switch (wire_type)
+    {
+        case PB_WT_VARINT: return pb_skip_varint(stream);
+        case PB_WT_64BIT: return pb_read(stream, NULL, 8);
+        case PB_WT_STRING: return pb_skip_string(stream);
+        case PB_WT_32BIT: return pb_read(stream, NULL, 4);
+        default: PB_RETURN_ERROR(stream, "invalid wire_type");
+    }
+}
+
+/* Read a raw value to buffer, for the purpose of passing it to callback as
+ * a substream. Size is maximum size on call, and actual size on return.
+ */
+static bool checkreturn read_raw_value(pb_istream_t *stream, pb_wire_type_t wire_type, pb_byte_t *buf, size_t *size)
+{
+    size_t max_size = *size;
+    switch (wire_type)
+    {
+        case PB_WT_VARINT:
+            *size = 0;
+            do
+            {
+                (*size)++;
+                if (*size > max_size)
+                    PB_RETURN_ERROR(stream, "varint overflow");
+
+                if (!pb_read(stream, buf, 1))
+                    return false;
+            } while (*buf++ & 0x80);
+            return true;
+            
+        case PB_WT_64BIT:
+            *size = 8;
+            return pb_read(stream, buf, 8);
+        
+        case PB_WT_32BIT:
+            *size = 4;
+            return pb_read(stream, buf, 4);
+        
+        case PB_WT_STRING:
+            /* Calling read_raw_value with a PB_WT_STRING is an error.
+             * Explicitly handle this case and fallthrough to default to avoid
+             * compiler warnings.
+             */
+
+        default: PB_RETURN_ERROR(stream, "invalid wire_type");
+    }
+}
+
+/* Decode string length from stream and return a substream with limited length.
+ * Remember to close the substream using pb_close_string_substream().
+ */
+bool checkreturn pb_make_string_substream(pb_istream_t *stream, pb_istream_t *substream)
+{
+    uint32_t size;
+    if (!pb_decode_varint32(stream, &size))
+        return false;
+    
+    *substream = *stream;
+    if (substream->bytes_left < size)
+        PB_RETURN_ERROR(stream, "parent stream too short");
+    
+    substream->bytes_left = (size_t)size;
+    stream->bytes_left -= (size_t)size;
+    return true;
+}
+
+bool checkreturn pb_close_string_substream(pb_istream_t *stream, pb_istream_t *substream)
+{
+    if (substream->bytes_left) {
+        if (!pb_read(substream, NULL, substream->bytes_left))
+            return false;
+    }
+
+    stream->state = substream->state;
+
+#ifndef PB_NO_ERRMSG
+    stream->errmsg = substream->errmsg;
+#endif
+    return true;
+}
+
+/*************************
+ * Decode a single field *
+ *************************/
+
+static bool checkreturn decode_basic_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field)
+{
+    switch (PB_LTYPE(field->type))
+    {
+        case PB_LTYPE_BOOL:
+            if (wire_type != PB_WT_VARINT && wire_type != PB_WT_PACKED)
+                PB_RETURN_ERROR(stream, "wrong wire type");
+
+            return pb_dec_bool(stream, field);
+
+        case PB_LTYPE_VARINT:
+        case PB_LTYPE_UVARINT:
+        case PB_LTYPE_SVARINT:
+            if (wire_type != PB_WT_VARINT && wire_type != PB_WT_PACKED)
+                PB_RETURN_ERROR(stream, "wrong wire type");
+
+            return pb_dec_varint(stream, field);
+
+        case PB_LTYPE_FIXED32:
+            if (wire_type != PB_WT_32BIT && wire_type != PB_WT_PACKED)
+                PB_RETURN_ERROR(stream, "wrong wire type");
+
+            return pb_decode_fixed32(stream, field->pData);
+
+        case PB_LTYPE_FIXED64:
+            if (wire_type != PB_WT_64BIT && wire_type != PB_WT_PACKED)
+                PB_RETURN_ERROR(stream, "wrong wire type");
+
+#ifdef PB_CONVERT_DOUBLE_FLOAT
+            if (field->data_size == sizeof(float))
+            {
+                return pb_decode_double_as_float(stream, (float*)field->pData);
+            }
+#endif
+
+#ifdef PB_WITHOUT_64BIT
+            PB_RETURN_ERROR(stream, "invalid data_size");
+#else
+            return pb_decode_fixed64(stream, field->pData);
+#endif
+
+        case PB_LTYPE_BYTES:
+            if (wire_type != PB_WT_STRING)
+                PB_RETURN_ERROR(stream, "wrong wire type");
+
+            return pb_dec_bytes(stream, field);
+
+        case PB_LTYPE_STRING:
+            if (wire_type != PB_WT_STRING)
+                PB_RETURN_ERROR(stream, "wrong wire type");
+
+            return pb_dec_string(stream, field);
+
+        case PB_LTYPE_SUBMESSAGE:
+        case PB_LTYPE_SUBMSG_W_CB:
+            if (wire_type != PB_WT_STRING)
+                PB_RETURN_ERROR(stream, "wrong wire type");
+
+            return pb_dec_submessage(stream, field);
+
+        case PB_LTYPE_FIXED_LENGTH_BYTES:
+            if (wire_type != PB_WT_STRING)
+                PB_RETURN_ERROR(stream, "wrong wire type");
+
+            return pb_dec_fixed_length_bytes(stream, field);
+
+        default:
+            PB_RETURN_ERROR(stream, "invalid field type");
+    }
+}
+
+static bool checkreturn decode_static_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field)
+{
+    switch (PB_HTYPE(field->type))
+    {
+        case PB_HTYPE_REQUIRED:
+            return decode_basic_field(stream, wire_type, field);
+            
+        case PB_HTYPE_OPTIONAL:
+            if (field->pSize != NULL)
+                *(bool*)field->pSize = true;
+            return decode_basic_field(stream, wire_type, field);
+    
+        case PB_HTYPE_REPEATED:
+            if (wire_type == PB_WT_STRING
+                && PB_LTYPE(field->type) <= PB_LTYPE_LAST_PACKABLE)
+            {
+                /* Packed array */
+                bool status = true;
+                pb_istream_t substream;
+                pb_size_t *size = (pb_size_t*)field->pSize;
+                field->pData = (char*)field->pField + field->data_size * (*size);
+
+                if (!pb_make_string_substream(stream, &substream))
+                    return false;
+
+                while (substream.bytes_left > 0 && *size < field->array_size)
+                {
+                    if (!decode_basic_field(&substream, PB_WT_PACKED, field))
+                    {
+                        status = false;
+                        break;
+                    }
+                    (*size)++;
+                    field->pData = (char*)field->pData + field->data_size;
+                }
+
+                if (substream.bytes_left != 0)
+                    PB_RETURN_ERROR(stream, "array overflow");
+                if (!pb_close_string_substream(stream, &substream))
+                    return false;
+
+                return status;
+            }
+            else
+            {
+                /* Repeated field */
+                pb_size_t *size = (pb_size_t*)field->pSize;
+                field->pData = (char*)field->pField + field->data_size * (*size);
+
+                if ((*size)++ >= field->array_size)
+                    PB_RETURN_ERROR(stream, "array overflow");
+
+                return decode_basic_field(stream, wire_type, field);
+            }
+
+        case PB_HTYPE_ONEOF:
+            if (PB_LTYPE_IS_SUBMSG(field->type) &&
+                *(pb_size_t*)field->pSize != field->tag)
+            {
+                /* We memset to zero so that any callbacks are set to NULL.
+                 * This is because the callbacks might otherwise have values
+                 * from some other union field.
+                 * If callbacks are needed inside oneof field, use .proto
+                 * option submsg_callback to have a separate callback function
+                 * that can set the fields before submessage is decoded.
+                 * pb_dec_submessage() will set any default values. */
+                memset(field->pData, 0, (size_t)field->data_size);
+
+                /* Set default values for the submessage fields. */
+                if (field->submsg_desc->default_value != NULL ||
+                    field->submsg_desc->field_callback != NULL ||
+                    field->submsg_desc->submsg_info[0] != NULL)
+                {
+                    pb_field_iter_t submsg_iter;
+                    if (pb_field_iter_begin(&submsg_iter, field->submsg_desc, field->pData))
+                    {
+                        if (!pb_message_set_to_defaults(&submsg_iter))
+                            PB_RETURN_ERROR(stream, "failed to set defaults");
+                    }
+                }
+            }
+            *(pb_size_t*)field->pSize = field->tag;
+
+            return decode_basic_field(stream, wire_type, field);
+
+        default:
+            PB_RETURN_ERROR(stream, "invalid field type");
+    }
+}
+
+#ifdef PB_ENABLE_MALLOC
+/* Allocate storage for the field and store the pointer at iter->pData.
+ * array_size is the number of entries to reserve in an array.
+ * Zero size is not allowed, use pb_free() for releasing.
+ */
+static bool checkreturn allocate_field(pb_istream_t *stream, void *pData, size_t data_size, size_t array_size)
+{    
+    void *ptr = *(void**)pData;
+    
+    if (data_size == 0 || array_size == 0)
+        PB_RETURN_ERROR(stream, "invalid size");
+    
+#ifdef __AVR__
+    /* Workaround for AVR libc bug 53284: http://savannah.nongnu.org/bugs/?53284
+     * Realloc to size of 1 byte can cause corruption of the malloc structures.
+     */
+    if (data_size == 1 && array_size == 1)
+    {
+        data_size = 2;
+    }
+#endif
+
+    /* Check for multiplication overflows.
+     * This code avoids the costly division if the sizes are small enough.
+     * Multiplication is safe as long as only half of bits are set
+     * in either multiplicand.
+     */
+    {
+        const size_t check_limit = (size_t)1 << (sizeof(size_t) * 4);
+        if (data_size >= check_limit || array_size >= check_limit)
+        {
+            const size_t size_max = (size_t)-1;
+            if (size_max / array_size < data_size)
+            {
+                PB_RETURN_ERROR(stream, "size too large");
+            }
+        }
+    }
+    
+    /* Allocate new or expand previous allocation */
+    /* Note: on failure the old pointer will remain in the structure,
+     * the message must be freed by caller also on error return. */
+    ptr = pb_realloc(ptr, array_size * data_size);
+    if (ptr == NULL)
+        PB_RETURN_ERROR(stream, "realloc failed");
+    
+    *(void**)pData = ptr;
+    return true;
+}
+
+/* Clear a newly allocated item in case it contains a pointer, or is a submessage. */
+static void initialize_pointer_field(void *pItem, pb_field_iter_t *field)
+{
+    if (PB_LTYPE(field->type) == PB_LTYPE_STRING ||
+        PB_LTYPE(field->type) == PB_LTYPE_BYTES)
+    {
+        *(void**)pItem = NULL;
+    }
+    else if (PB_LTYPE_IS_SUBMSG(field->type))
+    {
+        /* We memset to zero so that any callbacks are set to NULL.
+         * Default values will be set by pb_dec_submessage(). */
+        memset(pItem, 0, field->data_size);
+    }
+}
+#endif
+
+static bool checkreturn decode_pointer_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field)
+{
+#ifndef PB_ENABLE_MALLOC
+    PB_UNUSED(wire_type);
+    PB_UNUSED(field);
+    PB_RETURN_ERROR(stream, "no malloc support");
+#else
+    switch (PB_HTYPE(field->type))
+    {
+        case PB_HTYPE_REQUIRED:
+        case PB_HTYPE_OPTIONAL:
+        case PB_HTYPE_ONEOF:
+            if (PB_LTYPE_IS_SUBMSG(field->type) && *(void**)field->pField != NULL)
+            {
+                /* Duplicate field, have to release the old allocation first. */
+                /* FIXME: Does this work correctly for oneofs? */
+                pb_release_single_field(field);
+            }
+        
+            if (PB_HTYPE(field->type) == PB_HTYPE_ONEOF)
+            {
+                *(pb_size_t*)field->pSize = field->tag;
+            }
+
+            if (PB_LTYPE(field->type) == PB_LTYPE_STRING ||
+                PB_LTYPE(field->type) == PB_LTYPE_BYTES)
+            {
+                /* pb_dec_string and pb_dec_bytes handle allocation themselves */
+                field->pData = field->pField;
+                return decode_basic_field(stream, wire_type, field);
+            }
+            else
+            {
+                if (!allocate_field(stream, field->pField, field->data_size, 1))
+                    return false;
+                
+                field->pData = *(void**)field->pField;
+                initialize_pointer_field(field->pData, field);
+                return decode_basic_field(stream, wire_type, field);
+            }
+    
+        case PB_HTYPE_REPEATED:
+            if (wire_type == PB_WT_STRING
+                && PB_LTYPE(field->type) <= PB_LTYPE_LAST_PACKABLE)
+            {
+                /* Packed array, multiple items come in at once. */
+                bool status = true;
+                pb_size_t *size = (pb_size_t*)field->pSize;
+                size_t allocated_size = *size;
+                pb_istream_t substream;
+                
+                if (!pb_make_string_substream(stream, &substream))
+                    return false;
+                
+                while (substream.bytes_left)
+                {
+                    if (*size == PB_SIZE_MAX)
+                    {
+#ifndef PB_NO_ERRMSG
+                        stream->errmsg = "too many array entries";
+#endif
+                        status = false;
+                        break;
+                    }
+
+                    if ((size_t)*size + 1 > allocated_size)
+                    {
+                        /* Allocate more storage. This tries to guess the
+                         * number of remaining entries. Round the division
+                         * upwards. */
+                        size_t remain = (substream.bytes_left - 1) / field->data_size + 1;
+                        if (remain < PB_SIZE_MAX - allocated_size)
+                            allocated_size += remain;
+                        else
+                            allocated_size += 1;
+                        
+                        if (!allocate_field(&substream, field->pField, field->data_size, allocated_size))
+                        {
+                            status = false;
+                            break;
+                        }
+                    }
+
+                    /* Decode the array entry */
+                    field->pData = *(char**)field->pField + field->data_size * (*size);
+                    initialize_pointer_field(field->pData, field);
+                    if (!decode_basic_field(&substream, PB_WT_PACKED, field))
+                    {
+                        status = false;
+                        break;
+                    }
+                    
+                    (*size)++;
+                }
+                if (!pb_close_string_substream(stream, &substream))
+                    return false;
+                
+                return status;
+            }
+            else
+            {
+                /* Normal repeated field, i.e. only one item at a time. */
+                pb_size_t *size = (pb_size_t*)field->pSize;
+
+                if (*size == PB_SIZE_MAX)
+                    PB_RETURN_ERROR(stream, "too many array entries");
+                
+                if (!allocate_field(stream, field->pField, field->data_size, (size_t)(*size + 1)))
+                    return false;
+            
+                field->pData = *(char**)field->pField + field->data_size * (*size);
+                (*size)++;
+                initialize_pointer_field(field->pData, field);
+                return decode_basic_field(stream, wire_type, field);
+            }
+
+        default:
+            PB_RETURN_ERROR(stream, "invalid field type");
+    }
+#endif
+}
+
+static bool checkreturn decode_callback_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field)
+{
+    if (!field->descriptor->field_callback)
+        return pb_skip_field(stream, wire_type);
+
+    if (wire_type == PB_WT_STRING)
+    {
+        pb_istream_t substream;
+        size_t prev_bytes_left;
+        
+        if (!pb_make_string_substream(stream, &substream))
+            return false;
+        
+        do
+        {
+            prev_bytes_left = substream.bytes_left;
+            if (!field->descriptor->field_callback(&substream, NULL, field))
+                PB_RETURN_ERROR(stream, "callback failed");
+        } while (substream.bytes_left > 0 && substream.bytes_left < prev_bytes_left);
+        
+        if (!pb_close_string_substream(stream, &substream))
+            return false;
+
+        return true;
+    }
+    else
+    {
+        /* Copy the single scalar value to stack.
+         * This is required so that we can limit the stream length,
+         * which in turn allows to use same callback for packed and
+         * not-packed fields. */
+        pb_istream_t substream;
+        pb_byte_t buffer[10];
+        size_t size = sizeof(buffer);
+        
+        if (!read_raw_value(stream, wire_type, buffer, &size))
+            return false;
+        substream = pb_istream_from_buffer(buffer, size);
+        
+        return field->descriptor->field_callback(&substream, NULL, field);
+    }
+}
+
+static bool checkreturn decode_field(pb_istream_t *stream, pb_wire_type_t wire_type, pb_field_iter_t *field)
+{
+#ifdef PB_ENABLE_MALLOC
+    /* When decoding an oneof field, check if there is old data that must be
+     * released first. */
+    if (PB_HTYPE(field->type) == PB_HTYPE_ONEOF)
+    {
+        if (!pb_release_union_field(stream, field))
+            return false;
+    }
+#endif
+
+    switch (PB_ATYPE(field->type))
+    {
+        case PB_ATYPE_STATIC:
+            return decode_static_field(stream, wire_type, field);
+        
+        case PB_ATYPE_POINTER:
+            return decode_pointer_field(stream, wire_type, field);
+        
+        case PB_ATYPE_CALLBACK:
+            return decode_callback_field(stream, wire_type, field);
+        
+        default:
+            PB_RETURN_ERROR(stream, "invalid field type");
+    }
+}
+
+/* Default handler for extension fields. Expects to have a pb_msgdesc_t
+ * pointer in the extension->type->arg field, pointing to a message with
+ * only one field in it.  */
+static bool checkreturn default_extension_decoder(pb_istream_t *stream,
+    pb_extension_t *extension, uint32_t tag, pb_wire_type_t wire_type)
+{
+    pb_field_iter_t iter;
+
+    if (!pb_field_iter_begin_extension(&iter, extension))
+        PB_RETURN_ERROR(stream, "invalid extension");
+
+    if (iter.tag != tag || !iter.message)
+        return true;
+
+    extension->found = true;
+    return decode_field(stream, wire_type, &iter);
+}
+
+/* Try to decode an unknown field as an extension field. Tries each extension
+ * decoder in turn, until one of them handles the field or loop ends. */
+static bool checkreturn decode_extension(pb_istream_t *stream,
+    uint32_t tag, pb_wire_type_t wire_type, pb_extension_t *extension)
+{
+    size_t pos = stream->bytes_left;
+    
+    while (extension != NULL && pos == stream->bytes_left)
+    {
+        bool status;
+        if (extension->type->decode)
+            status = extension->type->decode(stream, extension, tag, wire_type);
+        else
+            status = default_extension_decoder(stream, extension, tag, wire_type);
+
+        if (!status)
+            return false;
+        
+        extension = extension->next;
+    }
+    
+    return true;
+}
+
+/* Initialize message fields to default values, recursively */
+static bool pb_field_set_to_default(pb_field_iter_t *field)
+{
+    pb_type_t type;
+    type = field->type;
+
+    if (PB_LTYPE(type) == PB_LTYPE_EXTENSION)
+    {
+        pb_extension_t *ext = *(pb_extension_t* const *)field->pData;
+        while (ext != NULL)
+        {
+            pb_field_iter_t ext_iter;
+            if (pb_field_iter_begin_extension(&ext_iter, ext))
+            {
+                ext->found = false;
+                if (!pb_message_set_to_defaults(&ext_iter))
+                    return false;
+            }
+            ext = ext->next;
+        }
+    }
+    else if (PB_ATYPE(type) == PB_ATYPE_STATIC)
+    {
+        bool init_data = true;
+        if (PB_HTYPE(type) == PB_HTYPE_OPTIONAL && field->pSize != NULL)
+        {
+            /* Set has_field to false. Still initialize the optional field
+             * itself also. */
+            *(bool*)field->pSize = false;
+        }
+        else if (PB_HTYPE(type) == PB_HTYPE_REPEATED ||
+                 PB_HTYPE(type) == PB_HTYPE_ONEOF)
+        {
+            /* REPEATED: Set array count to 0, no need to initialize contents.
+               ONEOF: Set which_field to 0. */
+            *(pb_size_t*)field->pSize = 0;
+            init_data = false;
+        }
+
+        if (init_data)
+        {
+            if (PB_LTYPE_IS_SUBMSG(field->type) &&
+                (field->submsg_desc->default_value != NULL ||
+                 field->submsg_desc->field_callback != NULL ||
+                 field->submsg_desc->submsg_info[0] != NULL))
+            {
+                /* Initialize submessage to defaults.
+                 * Only needed if it has default values
+                 * or callback/submessage fields. */
+                pb_field_iter_t submsg_iter;
+                if (pb_field_iter_begin(&submsg_iter, field->submsg_desc, field->pData))
+                {
+                    if (!pb_message_set_to_defaults(&submsg_iter))
+                        return false;
+                }
+            }
+            else
+            {
+                /* Initialize to zeros */
+                memset(field->pData, 0, (size_t)field->data_size);
+            }
+        }
+    }
+    else if (PB_ATYPE(type) == PB_ATYPE_POINTER)
+    {
+        /* Initialize the pointer to NULL. */
+        *(void**)field->pField = NULL;
+
+        /* Initialize array count to 0. */
+        if (PB_HTYPE(type) == PB_HTYPE_REPEATED ||
+            PB_HTYPE(type) == PB_HTYPE_ONEOF)
+        {
+            *(pb_size_t*)field->pSize = 0;
+        }
+    }
+    else if (PB_ATYPE(type) == PB_ATYPE_CALLBACK)
+    {
+        /* Don't overwrite callback */
+    }
+
+    return true;
+}
+
+static bool pb_message_set_to_defaults(pb_field_iter_t *iter)
+{
+    pb_istream_t defstream = PB_ISTREAM_EMPTY;
+    uint32_t tag = 0;
+    pb_wire_type_t wire_type = PB_WT_VARINT;
+    bool eof;
+
+    if (iter->descriptor->default_value)
+    {
+        defstream = pb_istream_from_buffer(iter->descriptor->default_value, (size_t)-1);
+        if (!pb_decode_tag(&defstream, &wire_type, &tag, &eof))
+            return false;
+    }
+
+    do
+    {
+        if (!pb_field_set_to_default(iter))
+            return false;
+
+        if (tag != 0 && iter->tag == tag)
+        {
+            /* We have a default value for this field in the defstream */
+            if (!decode_field(&defstream, wire_type, iter))
+                return false;
+            if (!pb_decode_tag(&defstream, &wire_type, &tag, &eof))
+                return false;
+
+            if (iter->pSize)
+                *(bool*)iter->pSize = false;
+        }
+    } while (pb_field_iter_next(iter));
+
+    return true;
+}
+
+/*********************
+ * Decode all fields *
+ *********************/
+
+static bool checkreturn pb_decode_inner(pb_istream_t *stream, const pb_msgdesc_t *fields, void *dest_struct, unsigned int flags)
+{
+    uint32_t extension_range_start = 0;
+    pb_extension_t *extensions = NULL;
+
+    /* 'fixed_count_field' and 'fixed_count_size' track position of a repeated fixed
+     * count field. This can only handle _one_ repeated fixed count field that
+     * is unpacked and unordered among other (non repeated fixed count) fields.
+     */
+    pb_size_t fixed_count_field = PB_SIZE_MAX;
+    pb_size_t fixed_count_size = 0;
+    pb_size_t fixed_count_total_size = 0;
+
+    pb_fields_seen_t fields_seen = {{0, 0}};
+    const uint32_t allbits = ~(uint32_t)0;
+    pb_field_iter_t iter;
+
+    if (pb_field_iter_begin(&iter, fields, dest_struct))
+    {
+        if ((flags & PB_DECODE_NOINIT) == 0)
+        {
+            if (!pb_message_set_to_defaults(&iter))
+                PB_RETURN_ERROR(stream, "failed to set defaults");
+        }
+    }
+
+    while (stream->bytes_left)
+    {
+        uint32_t tag;
+        pb_wire_type_t wire_type;
+        bool eof;
+
+        if (!pb_decode_tag(stream, &wire_type, &tag, &eof))
+        {
+            if (eof)
+                break;
+            else
+                return false;
+        }
+
+        if (tag == 0)
+        {
+          if (flags & PB_DECODE_NULLTERMINATED)
+          {
+            break;
+          }
+          else
+          {
+            PB_RETURN_ERROR(stream, "zero tag");
+          }
+        }
+
+        if (!pb_field_iter_find(&iter, tag) || PB_LTYPE(iter.type) == PB_LTYPE_EXTENSION)
+        {
+            /* No match found, check if it matches an extension. */
+            if (extension_range_start == 0)
+            {
+                if (pb_field_iter_find_extension(&iter))
+                {
+                    extensions = *(pb_extension_t* const *)iter.pData;
+                    extension_range_start = iter.tag;
+                }
+
+                if (!extensions)
+                {
+                    extension_range_start = (uint32_t)-1;
+                }
+            }
+
+            if (tag >= extension_range_start)
+            {
+                size_t pos = stream->bytes_left;
+
+                if (!decode_extension(stream, tag, wire_type, extensions))
+                    return false;
+
+                if (pos != stream->bytes_left)
+                {
+                    /* The field was handled */
+                    continue;
+                }
+            }
+
+            /* No match found, skip data */
+            if (!pb_skip_field(stream, wire_type))
+                return false;
+            continue;
+        }
+
+        /* If a repeated fixed count field was found, get size from
+         * 'fixed_count_field' as there is no counter contained in the struct.
+         */
+        if (PB_HTYPE(iter.type) == PB_HTYPE_REPEATED && iter.pSize == &iter.array_size)
+        {
+            if (fixed_count_field != iter.index) {
+                /* If the new fixed count field does not match the previous one,
+                 * check that the previous one is NULL or that it finished
+                 * receiving all the expected data.
+                 */
+                if (fixed_count_field != PB_SIZE_MAX &&
+                    fixed_count_size != fixed_count_total_size)
+                {
+                    PB_RETURN_ERROR(stream, "wrong size for fixed count field");
+                }
+
+                fixed_count_field = iter.index;
+                fixed_count_size = 0;
+                fixed_count_total_size = iter.array_size;
+            }
+
+            iter.pSize = &fixed_count_size;
+        }
+
+        if (PB_HTYPE(iter.type) == PB_HTYPE_REQUIRED
+            && iter.required_field_index < PB_MAX_REQUIRED_FIELDS)
+        {
+            uint32_t tmp = ((uint32_t)1 << (iter.required_field_index & 31));
+            fields_seen.bitfield[iter.required_field_index >> 5] |= tmp;
+        }
+
+        if (!decode_field(stream, wire_type, &iter))
+            return false;
+    }
+
+    /* Check that all elements of the last decoded fixed count field were present. */
+    if (fixed_count_field != PB_SIZE_MAX &&
+        fixed_count_size != fixed_count_total_size)
+    {
+        PB_RETURN_ERROR(stream, "wrong size for fixed count field");
+    }
+
+    /* Check that all required fields were present. */
+    {
+        pb_size_t req_field_count = iter.descriptor->required_field_count;
+
+        if (req_field_count > 0)
+        {
+            pb_size_t i;
+
+            if (req_field_count > PB_MAX_REQUIRED_FIELDS)
+                req_field_count = PB_MAX_REQUIRED_FIELDS;
+
+            /* Check the whole words */
+            for (i = 0; i < (req_field_count >> 5); i++)
+            {
+                if (fields_seen.bitfield[i] != allbits)
+                    PB_RETURN_ERROR(stream, "missing required field");
+            }
+
+            /* Check the remaining bits (if any) */
+            if ((req_field_count & 31) != 0)
+            {
+                if (fields_seen.bitfield[req_field_count >> 5] !=
+                    (allbits >> (uint_least8_t)(32 - (req_field_count & 31))))
+                {
+                    PB_RETURN_ERROR(stream, "missing required field");
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+bool checkreturn pb_decode_ex(pb_istream_t *stream, const pb_msgdesc_t *fields, void *dest_struct, unsigned int flags)
+{
+    bool status;
+
+    if ((flags & PB_DECODE_DELIMITED) == 0)
+    {
+      status = pb_decode_inner(stream, fields, dest_struct, flags);
+    }
+    else
+    {
+      pb_istream_t substream;
+      if (!pb_make_string_substream(stream, &substream))
+        return false;
+
+      status = pb_decode_inner(&substream, fields, dest_struct, flags);
+
+      if (!pb_close_string_substream(stream, &substream))
+        return false;
+    }
+    
+#ifdef PB_ENABLE_MALLOC
+    if (!status)
+        pb_release(fields, dest_struct);
+#endif
+    
+    return status;
+}
+
+bool checkreturn pb_decode(pb_istream_t *stream, const pb_msgdesc_t *fields, void *dest_struct)
+{
+    bool status;
+
+    status = pb_decode_inner(stream, fields, dest_struct, 0);
+
+#ifdef PB_ENABLE_MALLOC
+    if (!status)
+        pb_release(fields, dest_struct);
+#endif
+
+    return status;
+}
+
+#ifdef PB_ENABLE_MALLOC
+/* Given an oneof field, if there has already been a field inside this oneof,
+ * release it before overwriting with a different one. */
+static bool pb_release_union_field(pb_istream_t *stream, pb_field_iter_t *field)
+{
+    pb_field_iter_t old_field = *field;
+    pb_size_t old_tag = *(pb_size_t*)field->pSize; /* Previous which_ value */
+    pb_size_t new_tag = field->tag; /* New which_ value */
+
+    if (old_tag == 0)
+        return true; /* Ok, no old data in union */
+
+    if (old_tag == new_tag)
+        return true; /* Ok, old data is of same type => merge */
+
+    /* Release old data. The find can fail if the message struct contains
+     * invalid data. */
+    if (!pb_field_iter_find(&old_field, old_tag))
+        PB_RETURN_ERROR(stream, "invalid union tag");
+
+    pb_release_single_field(&old_field);
+
+    if (PB_ATYPE(field->type) == PB_ATYPE_POINTER)
+    {
+        /* Initialize the pointer to NULL to make sure it is valid
+         * even in case of error return. */
+        *(void**)field->pField = NULL;
+        field->pData = NULL;
+    }
+
+    return true;
+}
+
+static void pb_release_single_field(pb_field_iter_t *field)
+{
+    pb_type_t type;
+    type = field->type;
+
+    if (PB_HTYPE(type) == PB_HTYPE_ONEOF)
+    {
+        if (*(pb_size_t*)field->pSize != field->tag)
+            return; /* This is not the current field in the union */
+    }
+
+    /* Release anything contained inside an extension or submsg.
+     * This has to be done even if the submsg itself is statically
+     * allocated. */
+    if (PB_LTYPE(type) == PB_LTYPE_EXTENSION)
+    {
+        /* Release fields from all extensions in the linked list */
+        pb_extension_t *ext = *(pb_extension_t**)field->pData;
+        while (ext != NULL)
+        {
+            pb_field_iter_t ext_iter;
+            if (pb_field_iter_begin_extension(&ext_iter, ext))
+            {
+                pb_release_single_field(&ext_iter);
+            }
+            ext = ext->next;
+        }
+    }
+    else if (PB_LTYPE_IS_SUBMSG(type) && PB_ATYPE(type) != PB_ATYPE_CALLBACK)
+    {
+        /* Release fields in submessage or submsg array */
+        pb_size_t count = 1;
+        
+        if (PB_ATYPE(type) == PB_ATYPE_POINTER)
+        {
+            field->pData = *(void**)field->pField;
+        }
+        else
+        {
+            field->pData = field->pField;
+        }
+        
+        if (PB_HTYPE(type) == PB_HTYPE_REPEATED)
+        {
+            count = *(pb_size_t*)field->pSize;
+
+            if (PB_ATYPE(type) == PB_ATYPE_STATIC && count > field->array_size)
+            {
+                /* Protect against corrupted _count fields */
+                count = field->array_size;
+            }
+        }
+        
+        if (field->pData)
+        {
+            for (; count > 0; count--)
+            {
+                pb_release(field->submsg_desc, field->pData);
+                field->pData = (char*)field->pData + field->data_size;
+            }
+        }
+    }
+    
+    if (PB_ATYPE(type) == PB_ATYPE_POINTER)
+    {
+        if (PB_HTYPE(type) == PB_HTYPE_REPEATED &&
+            (PB_LTYPE(type) == PB_LTYPE_STRING ||
+             PB_LTYPE(type) == PB_LTYPE_BYTES))
+        {
+            /* Release entries in repeated string or bytes array */
+            void **pItem = *(void***)field->pField;
+            pb_size_t count = *(pb_size_t*)field->pSize;
+            for (; count > 0; count--)
+            {
+                pb_free(*pItem);
+                *pItem++ = NULL;
+            }
+        }
+        
+        if (PB_HTYPE(type) == PB_HTYPE_REPEATED)
+        {
+            /* We are going to release the array, so set the size to 0 */
+            *(pb_size_t*)field->pSize = 0;
+        }
+        
+        /* Release main pointer */
+        pb_free(*(void**)field->pField);
+        *(void**)field->pField = NULL;
+    }
+}
+
+void pb_release(const pb_msgdesc_t *fields, void *dest_struct)
+{
+    pb_field_iter_t iter;
+    
+    if (!dest_struct)
+        return; /* Ignore NULL pointers, similar to free() */
+
+    if (!pb_field_iter_begin(&iter, fields, dest_struct))
+        return; /* Empty message type */
+    
+    do
+    {
+        pb_release_single_field(&iter);
+    } while (pb_field_iter_next(&iter));
+}
+#endif
+
+/* Field decoders */
+
+bool pb_decode_bool(pb_istream_t *stream, bool *dest)
+{
+    uint32_t value;
+    if (!pb_decode_varint32(stream, &value))
+        return false;
+
+    *(bool*)dest = (value != 0);
+    return true;
+}
+
+bool pb_decode_svarint(pb_istream_t *stream, pb_int64_t *dest)
+{
+    pb_uint64_t value;
+    if (!pb_decode_varint(stream, &value))
+        return false;
+    
+    if (value & 1)
+        *dest = (pb_int64_t)(~(value >> 1));
+    else
+        *dest = (pb_int64_t)(value >> 1);
+    
+    return true;
+}
+
+bool pb_decode_fixed32(pb_istream_t *stream, void *dest)
+{
+    union {
+        uint32_t fixed32;
+        pb_byte_t bytes[4];
+    } u;
+
+    if (!pb_read(stream, u.bytes, 4))
+        return false;
+
+#if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN && CHAR_BIT == 8
+    /* fast path - if we know that we're on little endian, assign directly */
+    *(uint32_t*)dest = u.fixed32;
+#else
+    *(uint32_t*)dest = ((uint32_t)u.bytes[0] << 0) |
+                       ((uint32_t)u.bytes[1] << 8) |
+                       ((uint32_t)u.bytes[2] << 16) |
+                       ((uint32_t)u.bytes[3] << 24);
+#endif
+    return true;
+}
+
+#ifndef PB_WITHOUT_64BIT
+bool pb_decode_fixed64(pb_istream_t *stream, void *dest)
+{
+    union {
+        uint64_t fixed64;
+        pb_byte_t bytes[8];
+    } u;
+
+    if (!pb_read(stream, u.bytes, 8))
+        return false;
+
+#if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN && CHAR_BIT == 8
+    /* fast path - if we know that we're on little endian, assign directly */
+    *(uint64_t*)dest = u.fixed64;
+#else
+    *(uint64_t*)dest = ((uint64_t)u.bytes[0] << 0) |
+                       ((uint64_t)u.bytes[1] << 8) |
+                       ((uint64_t)u.bytes[2] << 16) |
+                       ((uint64_t)u.bytes[3] << 24) |
+                       ((uint64_t)u.bytes[4] << 32) |
+                       ((uint64_t)u.bytes[5] << 40) |
+                       ((uint64_t)u.bytes[6] << 48) |
+                       ((uint64_t)u.bytes[7] << 56);
+#endif
+    return true;
+}
+#endif
+
+static bool checkreturn pb_dec_bool(pb_istream_t *stream, const pb_field_iter_t *field)
+{
+    return pb_decode_bool(stream, (bool*)field->pData);
+}
+
+static bool checkreturn pb_dec_varint(pb_istream_t *stream, const pb_field_iter_t *field)
+{
+    if (PB_LTYPE(field->type) == PB_LTYPE_UVARINT)
+    {
+        pb_uint64_t value, clamped;
+        if (!pb_decode_varint(stream, &value))
+            return false;
+
+        /* Cast to the proper field size, while checking for overflows */
+        if (field->data_size == sizeof(pb_uint64_t))
+            clamped = *(pb_uint64_t*)field->pData = value;
+        else if (field->data_size == sizeof(uint32_t))
+            clamped = *(uint32_t*)field->pData = (uint32_t)value;
+        else if (field->data_size == sizeof(uint_least16_t))
+            clamped = *(uint_least16_t*)field->pData = (uint_least16_t)value;
+        else if (field->data_size == sizeof(uint_least8_t))
+            clamped = *(uint_least8_t*)field->pData = (uint_least8_t)value;
+        else
+            PB_RETURN_ERROR(stream, "invalid data_size");
+
+        if (clamped != value)
+            PB_RETURN_ERROR(stream, "integer too large");
+
+        return true;
+    }
+    else
+    {
+        pb_uint64_t value;
+        pb_int64_t svalue;
+        pb_int64_t clamped;
+
+        if (PB_LTYPE(field->type) == PB_LTYPE_SVARINT)
+        {
+            if (!pb_decode_svarint(stream, &svalue))
+                return false;
+        }
+        else
+        {
+            if (!pb_decode_varint(stream, &value))
+                return false;
+
+            /* See issue 97: Google's C++ protobuf allows negative varint values to
+            * be cast as int32_t, instead of the int64_t that should be used when
+            * encoding. Nanopb versions before 0.2.5 had a bug in encoding. In order to
+            * not break decoding of such messages, we cast <=32 bit fields to
+            * int32_t first to get the sign correct.
+            */
+            if (field->data_size == sizeof(pb_int64_t))
+                svalue = (pb_int64_t)value;
+            else
+                svalue = (int32_t)value;
+        }
+
+        /* Cast to the proper field size, while checking for overflows */
+        if (field->data_size == sizeof(pb_int64_t))
+            clamped = *(pb_int64_t*)field->pData = svalue;
+        else if (field->data_size == sizeof(int32_t))
+            clamped = *(int32_t*)field->pData = (int32_t)svalue;
+        else if (field->data_size == sizeof(int_least16_t))
+            clamped = *(int_least16_t*)field->pData = (int_least16_t)svalue;
+        else if (field->data_size == sizeof(int_least8_t))
+            clamped = *(int_least8_t*)field->pData = (int_least8_t)svalue;
+        else
+            PB_RETURN_ERROR(stream, "invalid data_size");
+
+        if (clamped != svalue)
+            PB_RETURN_ERROR(stream, "integer too large");
+
+        return true;
+    }
+}
+
+static bool checkreturn pb_dec_bytes(pb_istream_t *stream, const pb_field_iter_t *field)
+{
+    uint32_t size;
+    size_t alloc_size;
+    pb_bytes_array_t *dest;
+    
+    if (!pb_decode_varint32(stream, &size))
+        return false;
+    
+    if (size > PB_SIZE_MAX)
+        PB_RETURN_ERROR(stream, "bytes overflow");
+    
+    alloc_size = PB_BYTES_ARRAY_T_ALLOCSIZE(size);
+    if (size > alloc_size)
+        PB_RETURN_ERROR(stream, "size too large");
+    
+    if (PB_ATYPE(field->type) == PB_ATYPE_POINTER)
+    {
+#ifndef PB_ENABLE_MALLOC
+        PB_RETURN_ERROR(stream, "no malloc support");
+#else
+        if (stream->bytes_left < size)
+            PB_RETURN_ERROR(stream, "end-of-stream");
+
+        if (!allocate_field(stream, field->pData, alloc_size, 1))
+            return false;
+        dest = *(pb_bytes_array_t**)field->pData;
+#endif
+    }
+    else
+    {
+        if (alloc_size > field->data_size)
+            PB_RETURN_ERROR(stream, "bytes overflow");
+        dest = (pb_bytes_array_t*)field->pData;
+    }
+
+    dest->size = (pb_size_t)size;
+    return pb_read(stream, dest->bytes, (size_t)size);
+}
+
+static bool checkreturn pb_dec_string(pb_istream_t *stream, const pb_field_iter_t *field)
+{
+    uint32_t size;
+    size_t alloc_size;
+    pb_byte_t *dest = (pb_byte_t*)field->pData;
+
+    if (!pb_decode_varint32(stream, &size))
+        return false;
+
+    if (size == (uint32_t)-1)
+        PB_RETURN_ERROR(stream, "size too large");
+
+    /* Space for null terminator */
+    alloc_size = (size_t)(size + 1);
+
+    if (alloc_size < size)
+        PB_RETURN_ERROR(stream, "size too large");
+
+    if (PB_ATYPE(field->type) == PB_ATYPE_POINTER)
+    {
+#ifndef PB_ENABLE_MALLOC
+        PB_RETURN_ERROR(stream, "no malloc support");
+#else
+        if (stream->bytes_left < size)
+            PB_RETURN_ERROR(stream, "end-of-stream");
+
+        if (!allocate_field(stream, field->pData, alloc_size, 1))
+            return false;
+        dest = *(pb_byte_t**)field->pData;
+#endif
+    }
+    else
+    {
+        if (alloc_size > field->data_size)
+            PB_RETURN_ERROR(stream, "string overflow");
+    }
+    
+    dest[size] = 0;
+
+    if (!pb_read(stream, dest, (size_t)size))
+        return false;
+
+#ifdef PB_VALIDATE_UTF8
+    if (!pb_validate_utf8((const char*)dest))
+        PB_RETURN_ERROR(stream, "invalid utf8");
+#endif
+
+    return true;
+}
+
+static bool checkreturn pb_dec_submessage(pb_istream_t *stream, const pb_field_iter_t *field)
+{
+    bool status = true;
+    bool submsg_consumed = false;
+    pb_istream_t substream;
+
+    if (!pb_make_string_substream(stream, &substream))
+        return false;
+    
+    if (field->submsg_desc == NULL)
+        PB_RETURN_ERROR(stream, "invalid field descriptor");
+    
+    /* Submessages can have a separate message-level callback that is called
+     * before decoding the message. Typically it is used to set callback fields
+     * inside oneofs. */
+    if (PB_LTYPE(field->type) == PB_LTYPE_SUBMSG_W_CB && field->pSize != NULL)
+    {
+        /* Message callback is stored right before pSize. */
+        pb_callback_t *callback = (pb_callback_t*)field->pSize - 1;
+        if (callback->funcs.decode)
+        {
+            status = callback->funcs.decode(&substream, field, &callback->arg);
+
+            if (substream.bytes_left == 0)
+            {
+                submsg_consumed = true;
+            }
+        }
+    }
+
+    /* Now decode the submessage contents */
+    if (status && !submsg_consumed)
+    {
+        unsigned int flags = 0;
+
+        /* Static required/optional fields are already initialized by top-level
+         * pb_decode(), no need to initialize them again. */
+        if (PB_ATYPE(field->type) == PB_ATYPE_STATIC &&
+            PB_HTYPE(field->type) != PB_HTYPE_REPEATED)
+        {
+            flags = PB_DECODE_NOINIT;
+        }
+
+        status = pb_decode_inner(&substream, field->submsg_desc, field->pData, flags);
+    }
+    
+    if (!pb_close_string_substream(stream, &substream))
+        return false;
+
+    return status;
+}
+
+static bool checkreturn pb_dec_fixed_length_bytes(pb_istream_t *stream, const pb_field_iter_t *field)
+{
+    uint32_t size;
+
+    if (!pb_decode_varint32(stream, &size))
+        return false;
+
+    if (size > PB_SIZE_MAX)
+        PB_RETURN_ERROR(stream, "bytes overflow");
+
+    if (size == 0)
+    {
+        /* As a special case, treat empty bytes string as all zeros for fixed_length_bytes. */
+        memset(field->pData, 0, (size_t)field->data_size);
+        return true;
+    }
+
+    if (size != field->data_size)
+        PB_RETURN_ERROR(stream, "incorrect fixed length bytes size");
+
+    return pb_read(stream, (pb_byte_t*)field->pData, (size_t)field->data_size);
+}
+
+#ifdef PB_CONVERT_DOUBLE_FLOAT
+bool pb_decode_double_as_float(pb_istream_t *stream, float *dest)
+{
+    uint_least8_t sign;
+    int exponent;
+    uint32_t mantissa;
+    uint64_t value;
+    union { float f; uint32_t i; } out;
+
+    if (!pb_decode_fixed64(stream, &value))
+        return false;
+
+    /* Decompose input value */
+    sign = (uint_least8_t)((value >> 63) & 1);
+    exponent = (int)((value >> 52) & 0x7FF) - 1023;
+    mantissa = (value >> 28) & 0xFFFFFF; /* Highest 24 bits */
+
+    /* Figure if value is in range representable by floats. */
+    if (exponent == 1024)
+    {
+        /* Special value */
+        exponent = 128;
+        mantissa >>= 1;
+    }
+    else
+    {
+        if (exponent > 127)
+        {
+            /* Too large, convert to infinity */
+            exponent = 128;
+            mantissa = 0;
+        }
+        else if (exponent < -150)
+        {
+            /* Too small, convert to zero */
+            exponent = -127;
+            mantissa = 0;
+        }
+        else if (exponent < -126)
+        {
+            /* Denormalized */
+            mantissa |= 0x1000000;
+            mantissa >>= (-126 - exponent);
+            exponent = -127;
+        }
+
+        /* Round off mantissa */
+        mantissa = (mantissa + 1) >> 1;
+
+        /* Check if mantissa went over 2.0 */
+        if (mantissa & 0x800000)
+        {
+            exponent += 1;
+            mantissa &= 0x7FFFFF;
+            mantissa >>= 1;
+        }
+    }
+
+    /* Combine fields */
+    out.i = mantissa;
+    out.i |= (uint32_t)(exponent + 127) << 23;
+    out.i |= (uint32_t)sign << 31;
+
+    *dest = out.f;
+    return true;
+}
+#endif

diff --git a/security/container/protos/nanopb/pb_decode.h b/security/container/protos/nanopb/pb_decode.h
new file mode 100644
index 0000000..824acd4
--- /dev/null
+++ b/security/container/protos/nanopb/pb_decode.h

@@ -0,0 +1,199 @@
+/* pb_decode.h: Functions to decode protocol buffers. Depends on pb_decode.c.
+ * The main function is pb_decode. You also need an input stream, and the
+ * field descriptions created by nanopb_generator.py.
+ */
+
+#ifndef PB_DECODE_H_INCLUDED
+#define PB_DECODE_H_INCLUDED
+
+#include "pb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Structure for defining custom input streams. You will need to provide
+ * a callback function to read the bytes from your storage, which can be
+ * for example a file or a network socket.
+ * 
+ * The callback must conform to these rules:
+ *
+ * 1) Return false on IO errors. This will cause decoding to abort.
+ * 2) You can use state to store your own data (e.g. buffer pointer),
+ *    and rely on pb_read to verify that no-body reads past bytes_left.
+ * 3) Your callback may be used with substreams, in which case bytes_left
+ *    is different than from the main stream. Don't use bytes_left to compute
+ *    any pointers.
+ */
+struct pb_istream_s
+{
+#ifdef PB_BUFFER_ONLY
+    /* Callback pointer is not used in buffer-only configuration.
+     * Having an int pointer here allows binary compatibility but
+     * gives an error if someone tries to assign callback function.
+     */
+    int *callback;
+#else
+    bool (*callback)(pb_istream_t *stream, pb_byte_t *buf, size_t count);
+#endif
+
+    void *state; /* Free field for use by callback implementation */
+    size_t bytes_left;
+    
+#ifndef PB_NO_ERRMSG
+    const char *errmsg;
+#endif
+};
+
+#ifndef PB_NO_ERRMSG
+#define PB_ISTREAM_EMPTY {0,0,0,0}
+#else
+#define PB_ISTREAM_EMPTY {0,0,0}
+#endif
+
+/***************************
+ * Main decoding functions *
+ ***************************/
+ 
+/* Decode a single protocol buffers message from input stream into a C structure.
+ * Returns true on success, false on any failure.
+ * The actual struct pointed to by dest must match the description in fields.
+ * Callback fields of the destination structure must be initialized by caller.
+ * All other fields will be initialized by this function.
+ *
+ * Example usage:
+ *    MyMessage msg = {};
+ *    uint8_t buffer[64];
+ *    pb_istream_t stream;
+ *    
+ *    // ... read some data into buffer ...
+ *
+ *    stream = pb_istream_from_buffer(buffer, count);
+ *    pb_decode(&stream, MyMessage_fields, &msg);
+ */
+bool pb_decode(pb_istream_t *stream, const pb_msgdesc_t *fields, void *dest_struct);
+
+/* Extended version of pb_decode, with several options to control
+ * the decoding process:
+ *
+ * PB_DECODE_NOINIT:         Do not initialize the fields to default values.
+ *                           This is slightly faster if you do not need the default
+ *                           values and instead initialize the structure to 0 using
+ *                           e.g. memset(). This can also be used for merging two
+ *                           messages, i.e. combine already existing data with new
+ *                           values.
+ *
+ * PB_DECODE_DELIMITED:      Input message starts with the message size as varint.
+ *                           Corresponds to parseDelimitedFrom() in Google's
+ *                           protobuf API.
+ *
+ * PB_DECODE_NULLTERMINATED: Stop reading when field tag is read as 0. This allows
+ *                           reading null terminated messages.
+ *                           NOTE: Until nanopb-0.4.0, pb_decode() also allows
+ *                           null-termination. This behaviour is not supported in
+ *                           most other protobuf implementations, so PB_DECODE_DELIMITED
+ *                           is a better option for compatibility.
+ *
+ * Multiple flags can be combined with bitwise or (| operator)
+ */
+#define PB_DECODE_NOINIT          0x01U
+#define PB_DECODE_DELIMITED       0x02U
+#define PB_DECODE_NULLTERMINATED  0x04U
+bool pb_decode_ex(pb_istream_t *stream, const pb_msgdesc_t *fields, void *dest_struct, unsigned int flags);
+
+/* Defines for backwards compatibility with code written before nanopb-0.4.0 */
+#define pb_decode_noinit(s,f,d) pb_decode_ex(s,f,d, PB_DECODE_NOINIT)
+#define pb_decode_delimited(s,f,d) pb_decode_ex(s,f,d, PB_DECODE_DELIMITED)
+#define pb_decode_delimited_noinit(s,f,d) pb_decode_ex(s,f,d, PB_DECODE_DELIMITED | PB_DECODE_NOINIT)
+#define pb_decode_nullterminated(s,f,d) pb_decode_ex(s,f,d, PB_DECODE_NULLTERMINATED)
+
+#ifdef PB_ENABLE_MALLOC
+/* Release any allocated pointer fields. If you use dynamic allocation, you should
+ * call this for any successfully decoded message when you are done with it. If
+ * pb_decode() returns with an error, the message is already released.
+ */
+void pb_release(const pb_msgdesc_t *fields, void *dest_struct);
+#else
+/* Allocation is not supported, so release is no-op */
+#define pb_release(fields, dest_struct) PB_UNUSED(fields); PB_UNUSED(dest_struct);
+#endif
+
+
+/**************************************
+ * Functions for manipulating streams *
+ **************************************/
+
+/* Create an input stream for reading from a memory buffer.
+ *
+ * msglen should be the actual length of the message, not the full size of
+ * allocated buffer.
+ *
+ * Alternatively, you can use a custom stream that reads directly from e.g.
+ * a file or a network socket.
+ */
+pb_istream_t pb_istream_from_buffer(const pb_byte_t *buf, size_t msglen);
+
+/* Function to read from a pb_istream_t. You can use this if you need to
+ * read some custom header data, or to read data in field callbacks.
+ */
+bool pb_read(pb_istream_t *stream, pb_byte_t *buf, size_t count);
+
+
+/************************************************
+ * Helper functions for writing field callbacks *
+ ************************************************/
+
+/* Decode the tag for the next field in the stream. Gives the wire type and
+ * field tag. At end of the message, returns false and sets eof to true. */
+bool pb_decode_tag(pb_istream_t *stream, pb_wire_type_t *wire_type, uint32_t *tag, bool *eof);
+
+/* Skip the field payload data, given the wire type. */
+bool pb_skip_field(pb_istream_t *stream, pb_wire_type_t wire_type);
+
+/* Decode an integer in the varint format. This works for enum, int32,
+ * int64, uint32 and uint64 field types. */
+#ifndef PB_WITHOUT_64BIT
+bool pb_decode_varint(pb_istream_t *stream, uint64_t *dest);
+#else
+#define pb_decode_varint pb_decode_varint32
+#endif
+
+/* Decode an integer in the varint format. This works for enum, int32,
+ * and uint32 field types. */
+bool pb_decode_varint32(pb_istream_t *stream, uint32_t *dest);
+
+/* Decode a bool value in varint format. */
+bool pb_decode_bool(pb_istream_t *stream, bool *dest);
+
+/* Decode an integer in the zig-zagged svarint format. This works for sint32
+ * and sint64. */
+#ifndef PB_WITHOUT_64BIT
+bool pb_decode_svarint(pb_istream_t *stream, int64_t *dest);
+#else
+bool pb_decode_svarint(pb_istream_t *stream, int32_t *dest);
+#endif
+
+/* Decode a fixed32, sfixed32 or float value. You need to pass a pointer to
+ * a 4-byte wide C variable. */
+bool pb_decode_fixed32(pb_istream_t *stream, void *dest);
+
+#ifndef PB_WITHOUT_64BIT
+/* Decode a fixed64, sfixed64 or double value. You need to pass a pointer to
+ * a 8-byte wide C variable. */
+bool pb_decode_fixed64(pb_istream_t *stream, void *dest);
+#endif
+
+#ifdef PB_CONVERT_DOUBLE_FLOAT
+/* Decode a double value into float variable. */
+bool pb_decode_double_as_float(pb_istream_t *stream, float *dest);
+#endif
+
+/* Make a limited-length substream for reading a PB_WT_STRING field. */
+bool pb_make_string_substream(pb_istream_t *stream, pb_istream_t *substream);
+bool pb_close_string_substream(pb_istream_t *stream, pb_istream_t *substream);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif

diff --git a/security/container/protos/nanopb/pb_encode.c b/security/container/protos/nanopb/pb_encode.c
new file mode 100644
index 0000000..de716f7
--- /dev/null
+++ b/security/container/protos/nanopb/pb_encode.c

@@ -0,0 +1,987 @@
+/* pb_encode.c -- encode a protobuf using minimal resources
+ *
+ * 2011 Petteri Aimonen <jpa@kapsi.fi>
+ */
+
+#include "pb.h"
+#include "pb_encode.h"
+#include "pb_common.h"
+
+/* Use the GCC warn_unused_result attribute to check that all return values
+ * are propagated correctly. On other compilers and gcc before 3.4.0 just
+ * ignore the annotation.
+ */
+#if !defined(__GNUC__) || ( __GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)
+    #define checkreturn
+#else
+    #define checkreturn __attribute__((warn_unused_result))
+#endif
+
+/**************************************
+ * Declarations internal to this file *
+ **************************************/
+static bool checkreturn buf_write(pb_ostream_t *stream, const pb_byte_t *buf, size_t count);
+static bool checkreturn encode_array(pb_ostream_t *stream, pb_field_iter_t *field);
+static bool checkreturn pb_check_proto3_default_value(const pb_field_iter_t *field);
+static bool checkreturn encode_basic_field(pb_ostream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn encode_callback_field(pb_ostream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn encode_field(pb_ostream_t *stream, pb_field_iter_t *field);
+static bool checkreturn encode_extension_field(pb_ostream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn default_extension_encoder(pb_ostream_t *stream, const pb_extension_t *extension);
+static bool checkreturn pb_encode_varint_32(pb_ostream_t *stream, uint32_t low, uint32_t high);
+static bool checkreturn pb_enc_bool(pb_ostream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_enc_varint(pb_ostream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_enc_fixed(pb_ostream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_enc_bytes(pb_ostream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_enc_string(pb_ostream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_enc_submessage(pb_ostream_t *stream, const pb_field_iter_t *field);
+static bool checkreturn pb_enc_fixed_length_bytes(pb_ostream_t *stream, const pb_field_iter_t *field);
+
+#ifdef PB_WITHOUT_64BIT
+#define pb_int64_t int32_t
+#define pb_uint64_t uint32_t
+#else
+#define pb_int64_t int64_t
+#define pb_uint64_t uint64_t
+#endif
+
+/*******************************
+ * pb_ostream_t implementation *
+ *******************************/
+
+static bool checkreturn buf_write(pb_ostream_t *stream, const pb_byte_t *buf, size_t count)
+{
+    size_t i;
+    pb_byte_t *dest = (pb_byte_t*)stream->state;
+    stream->state = dest + count;
+    
+    for (i = 0; i < count; i++)
+        dest[i] = buf[i];
+    
+    return true;
+}
+
+pb_ostream_t pb_ostream_from_buffer(pb_byte_t *buf, size_t bufsize)
+{
+    pb_ostream_t stream;
+#ifdef PB_BUFFER_ONLY
+    stream.callback = (void*)1; /* Just a marker value */
+#else
+    stream.callback = &buf_write;
+#endif
+    stream.state = buf;
+    stream.max_size = bufsize;
+    stream.bytes_written = 0;
+#ifndef PB_NO_ERRMSG
+    stream.errmsg = NULL;
+#endif
+    return stream;
+}
+
+bool checkreturn pb_write(pb_ostream_t *stream, const pb_byte_t *buf, size_t count)
+{
+    if (count > 0 && stream->callback != NULL)
+    {
+        if (stream->bytes_written + count < stream->bytes_written ||
+            stream->bytes_written + count > stream->max_size)
+        {
+            PB_RETURN_ERROR(stream, "stream full");
+        }
+
+#ifdef PB_BUFFER_ONLY
+        if (!buf_write(stream, buf, count))
+            PB_RETURN_ERROR(stream, "io error");
+#else        
+        if (!stream->callback(stream, buf, count))
+            PB_RETURN_ERROR(stream, "io error");
+#endif
+    }
+    
+    stream->bytes_written += count;
+    return true;
+}
+
+/*************************
+ * Encode a single field *
+ *************************/
+
+/* Read a bool value without causing undefined behavior even if the value
+ * is invalid. See issue #434 and
+ * https://stackoverflow.com/questions/27661768/weird-results-for-conditional
+ */
+static bool safe_read_bool(const void *pSize)
+{
+    const char *p = (const char *)pSize;
+    size_t i;
+    for (i = 0; i < sizeof(bool); i++)
+    {
+        if (p[i] != 0)
+            return true;
+    }
+    return false;
+}
+
+/* Encode a static array. Handles the size calculations and possible packing. */
+static bool checkreturn encode_array(pb_ostream_t *stream, pb_field_iter_t *field)
+{
+    pb_size_t i;
+    pb_size_t count;
+#ifndef PB_ENCODE_ARRAYS_UNPACKED
+    size_t size;
+#endif
+
+    count = *(pb_size_t*)field->pSize;
+
+    if (count == 0)
+        return true;
+
+    if (PB_ATYPE(field->type) != PB_ATYPE_POINTER && count > field->array_size)
+        PB_RETURN_ERROR(stream, "array max size exceeded");
+    
+#ifndef PB_ENCODE_ARRAYS_UNPACKED
+    /* We always pack arrays if the datatype allows it. */
+    if (PB_LTYPE(field->type) <= PB_LTYPE_LAST_PACKABLE)
+    {
+        if (!pb_encode_tag(stream, PB_WT_STRING, field->tag))
+            return false;
+        
+        /* Determine the total size of packed array. */
+        if (PB_LTYPE(field->type) == PB_LTYPE_FIXED32)
+        {
+            size = 4 * (size_t)count;
+        }
+        else if (PB_LTYPE(field->type) == PB_LTYPE_FIXED64)
+        {
+            size = 8 * (size_t)count;
+        }
+        else
+        { 
+            pb_ostream_t sizestream = PB_OSTREAM_SIZING;
+            void *pData_orig = field->pData;
+            for (i = 0; i < count; i++)
+            {
+                if (!pb_enc_varint(&sizestream, field))
+                    PB_RETURN_ERROR(stream, PB_GET_ERROR(&sizestream));
+                field->pData = (char*)field->pData + field->data_size;
+            }
+            field->pData = pData_orig;
+            size = sizestream.bytes_written;
+        }
+        
+        if (!pb_encode_varint(stream, (pb_uint64_t)size))
+            return false;
+        
+        if (stream->callback == NULL)
+            return pb_write(stream, NULL, size); /* Just sizing.. */
+        
+        /* Write the data */
+        for (i = 0; i < count; i++)
+        {
+            if (PB_LTYPE(field->type) == PB_LTYPE_FIXED32 || PB_LTYPE(field->type) == PB_LTYPE_FIXED64)
+            {
+                if (!pb_enc_fixed(stream, field))
+                    return false;
+            }
+            else
+            {
+                if (!pb_enc_varint(stream, field))
+                    return false;
+            }
+
+            field->pData = (char*)field->pData + field->data_size;
+        }
+    }
+    else /* Unpacked fields */
+#endif
+    {
+        for (i = 0; i < count; i++)
+        {
+            /* Normally the data is stored directly in the array entries, but
+             * for pointer-type string and bytes fields, the array entries are
+             * actually pointers themselves also. So we have to dereference once
+             * more to get to the actual data. */
+            if (PB_ATYPE(field->type) == PB_ATYPE_POINTER &&
+                (PB_LTYPE(field->type) == PB_LTYPE_STRING ||
+                 PB_LTYPE(field->type) == PB_LTYPE_BYTES))
+            {
+                bool status;
+                void *pData_orig = field->pData;
+                field->pData = *(void* const*)field->pData;
+
+                if (!field->pData)
+                {
+                    /* Null pointer in array is treated as empty string / bytes */
+                    status = pb_encode_tag_for_field(stream, field) &&
+                             pb_encode_varint(stream, 0);
+                }
+                else
+                {
+                    status = encode_basic_field(stream, field);
+                }
+
+                field->pData = pData_orig;
+
+                if (!status)
+                    return false;
+            }
+            else
+            {
+                if (!encode_basic_field(stream, field))
+                    return false;
+            }
+            field->pData = (char*)field->pData + field->data_size;
+        }
+    }
+    
+    return true;
+}
+
+/* In proto3, all fields are optional and are only encoded if their value is "non-zero".
+ * This function implements the check for the zero value. */
+static bool checkreturn pb_check_proto3_default_value(const pb_field_iter_t *field)
+{
+    pb_type_t type = field->type;
+
+    if (PB_ATYPE(type) == PB_ATYPE_STATIC)
+    {
+        if (PB_HTYPE(type) == PB_HTYPE_REQUIRED)
+        {
+            /* Required proto2 fields inside proto3 submessage, pretty rare case */
+            return false;
+        }
+        else if (PB_HTYPE(type) == PB_HTYPE_REPEATED)
+        {
+            /* Repeated fields inside proto3 submessage: present if count != 0 */
+            return *(const pb_size_t*)field->pSize == 0;
+        }
+        else if (PB_HTYPE(type) == PB_HTYPE_ONEOF)
+        {
+            /* Oneof fields */
+            return *(const pb_size_t*)field->pSize == 0;
+        }
+        else if (PB_HTYPE(type) == PB_HTYPE_OPTIONAL && field->pSize != NULL)
+        {
+            /* Proto2 optional fields inside proto3 message, or proto3
+             * submessage fields. */
+            return safe_read_bool(field->pSize) == false;
+        }
+        else if (field->descriptor->default_value)
+        {
+            /* Proto3 messages do not have default values, but proto2 messages
+             * can contain optional fields without has_fields (generator option 'proto3').
+             * In this case they must always be encoded, to make sure that the
+             * non-zero default value is overwritten.
+             */
+            return false;
+        }
+
+        /* Rest is proto3 singular fields */
+        if (PB_LTYPE(type) <= PB_LTYPE_LAST_PACKABLE)
+        {
+            /* Simple integer / float fields */
+            pb_size_t i;
+            const char *p = (const char*)field->pData;
+            for (i = 0; i < field->data_size; i++)
+            {
+                if (p[i] != 0)
+                {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+        else if (PB_LTYPE(type) == PB_LTYPE_BYTES)
+        {
+            const pb_bytes_array_t *bytes = (const pb_bytes_array_t*)field->pData;
+            return bytes->size == 0;
+        }
+        else if (PB_LTYPE(type) == PB_LTYPE_STRING)
+        {
+            return *(const char*)field->pData == '\0';
+        }
+        else if (PB_LTYPE(type) == PB_LTYPE_FIXED_LENGTH_BYTES)
+        {
+            /* Fixed length bytes is only empty if its length is fixed
+             * as 0. Which would be pretty strange, but we can check
+             * it anyway. */
+            return field->data_size == 0;
+        }
+        else if (PB_LTYPE_IS_SUBMSG(type))
+        {
+            /* Check all fields in the submessage to find if any of them
+             * are non-zero. The comparison cannot be done byte-per-byte
+             * because the C struct may contain padding bytes that must
+             * be skipped. Note that usually proto3 submessages have
+             * a separate has_field that is checked earlier in this if.
+             */
+            pb_field_iter_t iter;
+            if (pb_field_iter_begin(&iter, field->submsg_desc, field->pData))
+            {
+                do
+                {
+                    if (!pb_check_proto3_default_value(&iter))
+                    {
+                        return false;
+                    }
+                } while (pb_field_iter_next(&iter));
+            }
+            return true;
+        }
+    }
+    else if (PB_ATYPE(type) == PB_ATYPE_POINTER)
+    {
+        return field->pData == NULL;
+    }
+    else if (PB_ATYPE(type) == PB_ATYPE_CALLBACK)
+    {
+        if (PB_LTYPE(type) == PB_LTYPE_EXTENSION)
+        {
+            const pb_extension_t *extension = *(const pb_extension_t* const *)field->pData;
+            return extension == NULL;
+        }
+        else if (field->descriptor->field_callback == pb_default_field_callback)
+        {
+            pb_callback_t *pCallback = (pb_callback_t*)field->pData;
+            return pCallback->funcs.encode == NULL;
+        }
+        else
+        {
+            return field->descriptor->field_callback == NULL;
+        }
+    }
+
+    return false; /* Not typically reached, safe default for weird special cases. */
+}
+
+/* Encode a field with static or pointer allocation, i.e. one whose data
+ * is available to the encoder directly. */
+static bool checkreturn encode_basic_field(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+    if (!field->pData)
+    {
+        /* Missing pointer field */
+        return true;
+    }
+
+    if (!pb_encode_tag_for_field(stream, field))
+        return false;
+
+    switch (PB_LTYPE(field->type))
+    {
+        case PB_LTYPE_BOOL:
+            return pb_enc_bool(stream, field);
+
+        case PB_LTYPE_VARINT:
+        case PB_LTYPE_UVARINT:
+        case PB_LTYPE_SVARINT:
+            return pb_enc_varint(stream, field);
+
+        case PB_LTYPE_FIXED32:
+        case PB_LTYPE_FIXED64:
+            return pb_enc_fixed(stream, field);
+
+        case PB_LTYPE_BYTES:
+            return pb_enc_bytes(stream, field);
+
+        case PB_LTYPE_STRING:
+            return pb_enc_string(stream, field);
+
+        case PB_LTYPE_SUBMESSAGE:
+        case PB_LTYPE_SUBMSG_W_CB:
+            return pb_enc_submessage(stream, field);
+
+        case PB_LTYPE_FIXED_LENGTH_BYTES:
+            return pb_enc_fixed_length_bytes(stream, field);
+
+        default:
+            PB_RETURN_ERROR(stream, "invalid field type");
+    }
+}
+
+/* Encode a field with callback semantics. This means that a user function is
+ * called to provide and encode the actual data. */
+static bool checkreturn encode_callback_field(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+    if (field->descriptor->field_callback != NULL)
+    {
+        if (!field->descriptor->field_callback(NULL, stream, field))
+            PB_RETURN_ERROR(stream, "callback error");
+    }
+    return true;
+}
+
+/* Encode a single field of any callback, pointer or static type. */
+static bool checkreturn encode_field(pb_ostream_t *stream, pb_field_iter_t *field)
+{
+    /* Check field presence */
+    if (PB_HTYPE(field->type) == PB_HTYPE_ONEOF)
+    {
+        if (*(const pb_size_t*)field->pSize != field->tag)
+        {
+            /* Different type oneof field */
+            return true;
+        }
+    }
+    else if (PB_HTYPE(field->type) == PB_HTYPE_OPTIONAL)
+    {
+        if (field->pSize)
+        {
+            if (safe_read_bool(field->pSize) == false)
+            {
+                /* Missing optional field */
+                return true;
+            }
+        }
+        else if (PB_ATYPE(field->type) == PB_ATYPE_STATIC)
+        {
+            /* Proto3 singular field */
+            if (pb_check_proto3_default_value(field))
+                return true;
+        }
+    }
+
+    if (!field->pData)
+    {
+        if (PB_HTYPE(field->type) == PB_HTYPE_REQUIRED)
+            PB_RETURN_ERROR(stream, "missing required field");
+
+        /* Pointer field set to NULL */
+        return true;
+    }
+
+    /* Then encode field contents */
+    if (PB_ATYPE(field->type) == PB_ATYPE_CALLBACK)
+    {
+        return encode_callback_field(stream, field);
+    }
+    else if (PB_HTYPE(field->type) == PB_HTYPE_REPEATED)
+    {
+        return encode_array(stream, field);
+    }
+    else
+    {
+        return encode_basic_field(stream, field);
+    }
+}
+
+/* Default handler for extension fields. Expects to have a pb_msgdesc_t
+ * pointer in the extension->type->arg field, pointing to a message with
+ * only one field in it.  */
+static bool checkreturn default_extension_encoder(pb_ostream_t *stream, const pb_extension_t *extension)
+{
+    pb_field_iter_t iter;
+
+    if (!pb_field_iter_begin_extension_const(&iter, extension))
+        PB_RETURN_ERROR(stream, "invalid extension");
+
+    return encode_field(stream, &iter);
+}
+
+
+/* Walk through all the registered extensions and give them a chance
+ * to encode themselves. */
+static bool checkreturn encode_extension_field(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+    const pb_extension_t *extension = *(const pb_extension_t* const *)field->pData;
+
+    while (extension)
+    {
+        bool status;
+        if (extension->type->encode)
+            status = extension->type->encode(stream, extension);
+        else
+            status = default_extension_encoder(stream, extension);
+
+        if (!status)
+            return false;
+        
+        extension = extension->next;
+    }
+    
+    return true;
+}
+
+/*********************
+ * Encode all fields *
+ *********************/
+
+bool checkreturn pb_encode(pb_ostream_t *stream, const pb_msgdesc_t *fields, const void *src_struct)
+{
+    pb_field_iter_t iter;
+    if (!pb_field_iter_begin_const(&iter, fields, src_struct))
+        return true; /* Empty message type */
+    
+    do {
+        if (PB_LTYPE(iter.type) == PB_LTYPE_EXTENSION)
+        {
+            /* Special case for the extension field placeholder */
+            if (!encode_extension_field(stream, &iter))
+                return false;
+        }
+        else
+        {
+            /* Regular field */
+            if (!encode_field(stream, &iter))
+                return false;
+        }
+    } while (pb_field_iter_next(&iter));
+    
+    return true;
+}
+
+bool checkreturn pb_encode_ex(pb_ostream_t *stream, const pb_msgdesc_t *fields, const void *src_struct, unsigned int flags)
+{
+  if ((flags & PB_ENCODE_DELIMITED) != 0)
+  {
+    return pb_encode_submessage(stream, fields, src_struct);
+  }
+  else if ((flags & PB_ENCODE_NULLTERMINATED) != 0)
+  {
+    const pb_byte_t zero = 0;
+
+    if (!pb_encode(stream, fields, src_struct))
+        return false;
+
+    return pb_write(stream, &zero, 1);
+  }
+  else
+  {
+    return pb_encode(stream, fields, src_struct);
+  }
+}
+
+bool pb_get_encoded_size(size_t *size, const pb_msgdesc_t *fields, const void *src_struct)
+{
+    pb_ostream_t stream = PB_OSTREAM_SIZING;
+    
+    if (!pb_encode(&stream, fields, src_struct))
+        return false;
+    
+    *size = stream.bytes_written;
+    return true;
+}
+
+/********************
+ * Helper functions *
+ ********************/
+
+/* This function avoids 64-bit shifts as they are quite slow on many platforms. */
+static bool checkreturn pb_encode_varint_32(pb_ostream_t *stream, uint32_t low, uint32_t high)
+{
+    size_t i = 0;
+    pb_byte_t buffer[10];
+    pb_byte_t byte = (pb_byte_t)(low & 0x7F);
+    low >>= 7;
+
+    while (i < 4 && (low != 0 || high != 0))
+    {
+        byte |= 0x80;
+        buffer[i++] = byte;
+        byte = (pb_byte_t)(low & 0x7F);
+        low >>= 7;
+    }
+
+    if (high)
+    {
+        byte = (pb_byte_t)(byte | ((high & 0x07) << 4));
+        high >>= 3;
+
+        while (high)
+        {
+            byte |= 0x80;
+            buffer[i++] = byte;
+            byte = (pb_byte_t)(high & 0x7F);
+            high >>= 7;
+        }
+    }
+
+    buffer[i++] = byte;
+
+    return pb_write(stream, buffer, i);
+}
+
+bool checkreturn pb_encode_varint(pb_ostream_t *stream, pb_uint64_t value)
+{
+    if (value <= 0x7F)
+    {
+        /* Fast path: single byte */
+        pb_byte_t byte = (pb_byte_t)value;
+        return pb_write(stream, &byte, 1);
+    }
+    else
+    {
+#ifdef PB_WITHOUT_64BIT
+        return pb_encode_varint_32(stream, value, 0);
+#else
+        return pb_encode_varint_32(stream, (uint32_t)value, (uint32_t)(value >> 32));
+#endif
+    }
+}
+
+bool checkreturn pb_encode_svarint(pb_ostream_t *stream, pb_int64_t value)
+{
+    pb_uint64_t zigzagged;
+    if (value < 0)
+        zigzagged = ~((pb_uint64_t)value << 1);
+    else
+        zigzagged = (pb_uint64_t)value << 1;
+    
+    return pb_encode_varint(stream, zigzagged);
+}
+
+bool checkreturn pb_encode_fixed32(pb_ostream_t *stream, const void *value)
+{
+    uint32_t val = *(const uint32_t*)value;
+    pb_byte_t bytes[4];
+    bytes[0] = (pb_byte_t)(val & 0xFF);
+    bytes[1] = (pb_byte_t)((val >> 8) & 0xFF);
+    bytes[2] = (pb_byte_t)((val >> 16) & 0xFF);
+    bytes[3] = (pb_byte_t)((val >> 24) & 0xFF);
+    return pb_write(stream, bytes, 4);
+}
+
+#ifndef PB_WITHOUT_64BIT
+bool checkreturn pb_encode_fixed64(pb_ostream_t *stream, const void *value)
+{
+    uint64_t val = *(const uint64_t*)value;
+    pb_byte_t bytes[8];
+    bytes[0] = (pb_byte_t)(val & 0xFF);
+    bytes[1] = (pb_byte_t)((val >> 8) & 0xFF);
+    bytes[2] = (pb_byte_t)((val >> 16) & 0xFF);
+    bytes[3] = (pb_byte_t)((val >> 24) & 0xFF);
+    bytes[4] = (pb_byte_t)((val >> 32) & 0xFF);
+    bytes[5] = (pb_byte_t)((val >> 40) & 0xFF);
+    bytes[6] = (pb_byte_t)((val >> 48) & 0xFF);
+    bytes[7] = (pb_byte_t)((val >> 56) & 0xFF);
+    return pb_write(stream, bytes, 8);
+}
+#endif
+
+bool checkreturn pb_encode_tag(pb_ostream_t *stream, pb_wire_type_t wiretype, uint32_t field_number)
+{
+    pb_uint64_t tag = ((pb_uint64_t)field_number << 3) | wiretype;
+    return pb_encode_varint(stream, tag);
+}
+
+bool pb_encode_tag_for_field ( pb_ostream_t* stream, const pb_field_iter_t* field )
+{
+    pb_wire_type_t wiretype;
+    switch (PB_LTYPE(field->type))
+    {
+        case PB_LTYPE_BOOL:
+        case PB_LTYPE_VARINT:
+        case PB_LTYPE_UVARINT:
+        case PB_LTYPE_SVARINT:
+            wiretype = PB_WT_VARINT;
+            break;
+        
+        case PB_LTYPE_FIXED32:
+            wiretype = PB_WT_32BIT;
+            break;
+        
+        case PB_LTYPE_FIXED64:
+            wiretype = PB_WT_64BIT;
+            break;
+        
+        case PB_LTYPE_BYTES:
+        case PB_LTYPE_STRING:
+        case PB_LTYPE_SUBMESSAGE:
+        case PB_LTYPE_SUBMSG_W_CB:
+        case PB_LTYPE_FIXED_LENGTH_BYTES:
+            wiretype = PB_WT_STRING;
+            break;
+        
+        default:
+            PB_RETURN_ERROR(stream, "invalid field type");
+    }
+    
+    return pb_encode_tag(stream, wiretype, field->tag);
+}
+
+bool checkreturn pb_encode_string(pb_ostream_t *stream, const pb_byte_t *buffer, size_t size)
+{
+    if (!pb_encode_varint(stream, (pb_uint64_t)size))
+        return false;
+    
+    return pb_write(stream, buffer, size);
+}
+
+bool checkreturn pb_encode_submessage(pb_ostream_t *stream, const pb_msgdesc_t *fields, const void *src_struct)
+{
+    /* First calculate the message size using a non-writing substream. */
+    pb_ostream_t substream = PB_OSTREAM_SIZING;
+    size_t size;
+    bool status;
+    
+    if (!pb_encode(&substream, fields, src_struct))
+    {
+#ifndef PB_NO_ERRMSG
+        stream->errmsg = substream.errmsg;
+#endif
+        return false;
+    }
+    
+    size = substream.bytes_written;
+    
+    if (!pb_encode_varint(stream, (pb_uint64_t)size))
+        return false;
+    
+    if (stream->callback == NULL)
+        return pb_write(stream, NULL, size); /* Just sizing */
+    
+    if (stream->bytes_written + size > stream->max_size)
+        PB_RETURN_ERROR(stream, "stream full");
+        
+    /* Use a substream to verify that a callback doesn't write more than
+     * what it did the first time. */
+    substream.callback = stream->callback;
+    substream.state = stream->state;
+    substream.max_size = size;
+    substream.bytes_written = 0;
+#ifndef PB_NO_ERRMSG
+    substream.errmsg = NULL;
+#endif
+    
+    status = pb_encode(&substream, fields, src_struct);
+    
+    stream->bytes_written += substream.bytes_written;
+    stream->state = substream.state;
+#ifndef PB_NO_ERRMSG
+    stream->errmsg = substream.errmsg;
+#endif
+    
+    if (substream.bytes_written != size)
+        PB_RETURN_ERROR(stream, "submsg size changed");
+    
+    return status;
+}
+
+/* Field encoders */
+
+static bool checkreturn pb_enc_bool(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+    uint32_t value = safe_read_bool(field->pData) ? 1 : 0;
+    PB_UNUSED(field);
+    return pb_encode_varint(stream, value);
+}
+
+static bool checkreturn pb_enc_varint(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+    if (PB_LTYPE(field->type) == PB_LTYPE_UVARINT)
+    {
+        /* Perform unsigned integer extension */
+        pb_uint64_t value = 0;
+
+        if (field->data_size == sizeof(uint_least8_t))
+            value = *(const uint_least8_t*)field->pData;
+        else if (field->data_size == sizeof(uint_least16_t))
+            value = *(const uint_least16_t*)field->pData;
+        else if (field->data_size == sizeof(uint32_t))
+            value = *(const uint32_t*)field->pData;
+        else if (field->data_size == sizeof(pb_uint64_t))
+            value = *(const pb_uint64_t*)field->pData;
+        else
+            PB_RETURN_ERROR(stream, "invalid data_size");
+
+        return pb_encode_varint(stream, value);
+    }
+    else
+    {
+        /* Perform signed integer extension */
+        pb_int64_t value = 0;
+
+        if (field->data_size == sizeof(int_least8_t))
+            value = *(const int_least8_t*)field->pData;
+        else if (field->data_size == sizeof(int_least16_t))
+            value = *(const int_least16_t*)field->pData;
+        else if (field->data_size == sizeof(int32_t))
+            value = *(const int32_t*)field->pData;
+        else if (field->data_size == sizeof(pb_int64_t))
+            value = *(const pb_int64_t*)field->pData;
+        else
+            PB_RETURN_ERROR(stream, "invalid data_size");
+
+        if (PB_LTYPE(field->type) == PB_LTYPE_SVARINT)
+            return pb_encode_svarint(stream, value);
+#ifdef PB_WITHOUT_64BIT
+        else if (value < 0)
+            return pb_encode_varint_32(stream, (uint32_t)value, (uint32_t)-1);
+#endif
+        else
+            return pb_encode_varint(stream, (pb_uint64_t)value);
+
+    }
+}
+
+static bool checkreturn pb_enc_fixed(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+#ifdef PB_CONVERT_DOUBLE_FLOAT
+    if (field->data_size == sizeof(float) && PB_LTYPE(field->type) == PB_LTYPE_FIXED64)
+    {
+        return pb_encode_float_as_double(stream, *(float*)field->pData);
+    }
+#endif
+
+    if (field->data_size == sizeof(uint32_t))
+    {
+        return pb_encode_fixed32(stream, field->pData);
+    }
+#ifndef PB_WITHOUT_64BIT
+    else if (field->data_size == sizeof(uint64_t))
+    {
+        return pb_encode_fixed64(stream, field->pData);
+    }
+#endif
+    else
+    {
+        PB_RETURN_ERROR(stream, "invalid data_size");
+    }
+}
+
+static bool checkreturn pb_enc_bytes(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+    const pb_bytes_array_t *bytes = NULL;
+
+    bytes = (const pb_bytes_array_t*)field->pData;
+    
+    if (bytes == NULL)
+    {
+        /* Treat null pointer as an empty bytes field */
+        return pb_encode_string(stream, NULL, 0);
+    }
+    
+    if (PB_ATYPE(field->type) == PB_ATYPE_STATIC &&
+        bytes->size > field->data_size - offsetof(pb_bytes_array_t, bytes))
+    {
+        PB_RETURN_ERROR(stream, "bytes size exceeded");
+    }
+    
+    return pb_encode_string(stream, bytes->bytes, (size_t)bytes->size);
+}
+
+static bool checkreturn pb_enc_string(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+    size_t size = 0;
+    size_t max_size = (size_t)field->data_size;
+    const char *str = (const char*)field->pData;
+    
+    if (PB_ATYPE(field->type) == PB_ATYPE_POINTER)
+    {
+        max_size = (size_t)-1;
+    }
+    else
+    {
+        /* pb_dec_string() assumes string fields end with a null
+         * terminator when the type isn't PB_ATYPE_POINTER, so we
+         * shouldn't allow more than max-1 bytes to be written to
+         * allow space for the null terminator.
+         */
+        if (max_size == 0)
+            PB_RETURN_ERROR(stream, "zero-length string");
+
+        max_size -= 1;
+    }
+
+
+    if (str == NULL)
+    {
+        size = 0; /* Treat null pointer as an empty string */
+    }
+    else
+    {
+        const char *p = str;
+
+        /* strnlen() is not always available, so just use a loop */
+        while (size < max_size && *p != '\0')
+        {
+            size++;
+            p++;
+        }
+
+        if (*p != '\0')
+        {
+            PB_RETURN_ERROR(stream, "unterminated string");
+        }
+    }
+
+#ifdef PB_VALIDATE_UTF8
+    if (!pb_validate_utf8(str))
+        PB_RETURN_ERROR(stream, "invalid utf8");
+#endif
+
+    return pb_encode_string(stream, (const pb_byte_t*)str, size);
+}
+
+static bool checkreturn pb_enc_submessage(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+    if (field->submsg_desc == NULL)
+        PB_RETURN_ERROR(stream, "invalid field descriptor");
+
+    if (PB_LTYPE(field->type) == PB_LTYPE_SUBMSG_W_CB && field->pSize != NULL)
+    {
+        /* Message callback is stored right before pSize. */
+        pb_callback_t *callback = (pb_callback_t*)field->pSize - 1;
+        if (callback->funcs.encode)
+        {
+            if (!callback->funcs.encode(stream, field, &callback->arg))
+                return false;
+        }
+    }
+    
+    return pb_encode_submessage(stream, field->submsg_desc, field->pData);
+}
+
+static bool checkreturn pb_enc_fixed_length_bytes(pb_ostream_t *stream, const pb_field_iter_t *field)
+{
+    return pb_encode_string(stream, (const pb_byte_t*)field->pData, (size_t)field->data_size);
+}
+
+#ifdef PB_CONVERT_DOUBLE_FLOAT
+bool pb_encode_float_as_double(pb_ostream_t *stream, float value)
+{
+    union { float f; uint32_t i; } in;
+    uint_least8_t sign;
+    int exponent;
+    uint64_t mantissa;
+
+    in.f = value;
+
+    /* Decompose input value */
+    sign = (uint_least8_t)((in.i >> 31) & 1);
+    exponent = (int)((in.i >> 23) & 0xFF) - 127;
+    mantissa = in.i & 0x7FFFFF;
+
+    if (exponent == 128)
+    {
+        /* Special value (NaN etc.) */
+        exponent = 1024;
+    }
+    else if (exponent == -127)
+    {
+        if (!mantissa)
+        {
+            /* Zero */
+            exponent = -1023;
+        }
+        else
+        {
+            /* Denormalized */
+            mantissa <<= 1;
+            while (!(mantissa & 0x800000))
+            {
+                mantissa <<= 1;
+                exponent--;
+            }
+            mantissa &= 0x7FFFFF;
+        }
+    }
+
+    /* Combine fields */
+    mantissa <<= 29;
+    mantissa |= (uint64_t)(exponent + 1023) << 52;
+    mantissa |= (uint64_t)sign << 63;
+
+    return pb_encode_fixed64(stream, &mantissa);
+}
+#endif

diff --git a/security/container/protos/nanopb/pb_encode.h b/security/container/protos/nanopb/pb_encode.h
new file mode 100644
index 0000000..9cff22a
--- /dev/null
+++ b/security/container/protos/nanopb/pb_encode.h

@@ -0,0 +1,185 @@
+/* pb_encode.h: Functions to encode protocol buffers. Depends on pb_encode.c.
+ * The main function is pb_encode. You also need an output stream, and the
+ * field descriptions created by nanopb_generator.py.
+ */
+
+#ifndef PB_ENCODE_H_INCLUDED
+#define PB_ENCODE_H_INCLUDED
+
+#include "pb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Structure for defining custom output streams. You will need to provide
+ * a callback function to write the bytes to your storage, which can be
+ * for example a file or a network socket.
+ *
+ * The callback must conform to these rules:
+ *
+ * 1) Return false on IO errors. This will cause encoding to abort.
+ * 2) You can use state to store your own data (e.g. buffer pointer).
+ * 3) pb_write will update bytes_written after your callback runs.
+ * 4) Substreams will modify max_size and bytes_written. Don't use them
+ *    to calculate any pointers.
+ */
+struct pb_ostream_s
+{
+#ifdef PB_BUFFER_ONLY
+    /* Callback pointer is not used in buffer-only configuration.
+     * Having an int pointer here allows binary compatibility but
+     * gives an error if someone tries to assign callback function.
+     * Also, NULL pointer marks a 'sizing stream' that does not
+     * write anything.
+     */
+    int *callback;
+#else
+    bool (*callback)(pb_ostream_t *stream, const pb_byte_t *buf, size_t count);
+#endif
+    void *state;          /* Free field for use by callback implementation. */
+    size_t max_size;      /* Limit number of output bytes written (or use SIZE_MAX). */
+    size_t bytes_written; /* Number of bytes written so far. */
+    
+#ifndef PB_NO_ERRMSG
+    const char *errmsg;
+#endif
+};
+
+/***************************
+ * Main encoding functions *
+ ***************************/
+
+/* Encode a single protocol buffers message from C structure into a stream.
+ * Returns true on success, false on any failure.
+ * The actual struct pointed to by src_struct must match the description in fields.
+ * All required fields in the struct are assumed to have been filled in.
+ *
+ * Example usage:
+ *    MyMessage msg = {};
+ *    uint8_t buffer[64];
+ *    pb_ostream_t stream;
+ *
+ *    msg.field1 = 42;
+ *    stream = pb_ostream_from_buffer(buffer, sizeof(buffer));
+ *    pb_encode(&stream, MyMessage_fields, &msg);
+ */
+bool pb_encode(pb_ostream_t *stream, const pb_msgdesc_t *fields, const void *src_struct);
+
+/* Extended version of pb_encode, with several options to control the
+ * encoding process:
+ *
+ * PB_ENCODE_DELIMITED:      Prepend the length of message as a varint.
+ *                           Corresponds to writeDelimitedTo() in Google's
+ *                           protobuf API.
+ *
+ * PB_ENCODE_NULLTERMINATED: Append a null byte to the message for termination.
+ *                           NOTE: This behaviour is not supported in most other
+ *                           protobuf implementations, so PB_ENCODE_DELIMITED
+ *                           is a better option for compatibility.
+ */
+#define PB_ENCODE_DELIMITED       0x02U
+#define PB_ENCODE_NULLTERMINATED  0x04U
+bool pb_encode_ex(pb_ostream_t *stream, const pb_msgdesc_t *fields, const void *src_struct, unsigned int flags);
+
+/* Defines for backwards compatibility with code written before nanopb-0.4.0 */
+#define pb_encode_delimited(s,f,d) pb_encode_ex(s,f,d, PB_ENCODE_DELIMITED)
+#define pb_encode_nullterminated(s,f,d) pb_encode_ex(s,f,d, PB_ENCODE_NULLTERMINATED)
+
+/* Encode the message to get the size of the encoded data, but do not store
+ * the data. */
+bool pb_get_encoded_size(size_t *size, const pb_msgdesc_t *fields, const void *src_struct);
+
+/**************************************
+ * Functions for manipulating streams *
+ **************************************/
+
+/* Create an output stream for writing into a memory buffer.
+ * The number of bytes written can be found in stream.bytes_written after
+ * encoding the message.
+ *
+ * Alternatively, you can use a custom stream that writes directly to e.g.
+ * a file or a network socket.
+ */
+pb_ostream_t pb_ostream_from_buffer(pb_byte_t *buf, size_t bufsize);
+
+/* Pseudo-stream for measuring the size of a message without actually storing
+ * the encoded data.
+ * 
+ * Example usage:
+ *    MyMessage msg = {};
+ *    pb_ostream_t stream = PB_OSTREAM_SIZING;
+ *    pb_encode(&stream, MyMessage_fields, &msg);
+ *    printf("Message size is %d\n", stream.bytes_written);
+ */
+#ifndef PB_NO_ERRMSG
+#define PB_OSTREAM_SIZING {0,0,0,0,0}
+#else
+#define PB_OSTREAM_SIZING {0,0,0,0}
+#endif
+
+/* Function to write into a pb_ostream_t stream. You can use this if you need
+ * to append or prepend some custom headers to the message.
+ */
+bool pb_write(pb_ostream_t *stream, const pb_byte_t *buf, size_t count);
+
+
+/************************************************
+ * Helper functions for writing field callbacks *
+ ************************************************/
+
+/* Encode field header based on type and field number defined in the field
+ * structure. Call this from the callback before writing out field contents. */
+bool pb_encode_tag_for_field(pb_ostream_t *stream, const pb_field_iter_t *field);
+
+/* Encode field header by manually specifying wire type. You need to use this
+ * if you want to write out packed arrays from a callback field. */
+bool pb_encode_tag(pb_ostream_t *stream, pb_wire_type_t wiretype, uint32_t field_number);
+
+/* Encode an integer in the varint format.
+ * This works for bool, enum, int32, int64, uint32 and uint64 field types. */
+#ifndef PB_WITHOUT_64BIT
+bool pb_encode_varint(pb_ostream_t *stream, uint64_t value);
+#else
+bool pb_encode_varint(pb_ostream_t *stream, uint32_t value);
+#endif
+
+/* Encode an integer in the zig-zagged svarint format.
+ * This works for sint32 and sint64. */
+#ifndef PB_WITHOUT_64BIT
+bool pb_encode_svarint(pb_ostream_t *stream, int64_t value);
+#else
+bool pb_encode_svarint(pb_ostream_t *stream, int32_t value);
+#endif
+
+/* Encode a string or bytes type field. For strings, pass strlen(s) as size. */
+bool pb_encode_string(pb_ostream_t *stream, const pb_byte_t *buffer, size_t size);
+
+/* Encode a fixed32, sfixed32 or float value.
+ * You need to pass a pointer to a 4-byte wide C variable. */
+bool pb_encode_fixed32(pb_ostream_t *stream, const void *value);
+
+#ifndef PB_WITHOUT_64BIT
+/* Encode a fixed64, sfixed64 or double value.
+ * You need to pass a pointer to a 8-byte wide C variable. */
+bool pb_encode_fixed64(pb_ostream_t *stream, const void *value);
+#endif
+
+#ifdef PB_CONVERT_DOUBLE_FLOAT
+/* Encode a float value so that it appears like a double in the encoded
+ * message. */
+bool pb_encode_float_as_double(pb_ostream_t *stream, float value);
+#endif
+
+/* Encode a submessage field.
+ * You need to pass the pb_field_t array and pointer to struct, just like
+ * with pb_encode(). This internally encodes the submessage twice, first to
+ * calculate message size and then to actually write it out.
+ */
+bool pb_encode_submessage(pb_ostream_t *stream, const pb_msgdesc_t *fields, const void *src_struct);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif

diff --git a/security/container/protos/pbsystem.h b/security/container/protos/pbsystem.h
new file mode 100644
index 0000000..f2308f8
--- /dev/null
+++ b/security/container/protos/pbsystem.h

@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Header and types for nanopb to work with the Linux kernel */
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+/* Small types.  */
+
+/* Signed.  */
+typedef signed char		int_least8_t;
+typedef short int		int_least16_t;
+typedef int			int_least32_t;
+typedef long int		int_least64_t;
+
+/* Unsigned.  */
+typedef unsigned char		uint_least8_t;
+typedef unsigned short int	uint_least16_t;
+typedef unsigned int		uint_least32_t;
+typedef unsigned long int	uint_least64_t;
+
+/* Fast types.  */
+
+/* Signed.  */
+typedef signed char		int_fast8_t;
+typedef long int		int_fast16_t;
+typedef long int		int_fast32_t;
+typedef long int		int_fast64_t;
+
+/* Unsigned.  */
+typedef unsigned char		uint_fast8_t;
+typedef unsigned long int	uint_fast16_t;
+typedef unsigned long int	uint_fast32_t;
+typedef unsigned long int	uint_fast64_t;

diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c
index ee5cb94..df744a9 100644
--- a/security/loadpin/loadpin.c
+++ b/security/loadpin/loadpin.c

@@ -237,7 +237,7 @@
 };
 
 /* Should not be mutable after boot, so not listed in sysfs (perm == 0). */
-module_param(enforce, int, 0);
-MODULE_PARM_DESC(enforce, "Enforce module/firmware pinning");
+module_param_named(enabled, enforce, int, 0);
+MODULE_PARM_DESC(enabled, "Enforce module/firmware pinning");
 module_param_array_named(exclude, exclude_read_files, charp, NULL, 0);
 MODULE_PARM_DESC(exclude, "Exclude pinning specific read file types");

diff --git a/security/security.c b/security/security.c
index 1bc000f..a21fae9 100644
--- a/security/security.c
+++ b/security/security.c

@@ -1404,6 +1404,11 @@
 	}
 }
 
+void security_file_pre_free(struct file *file)
+{
+	call_void_hook(file_pre_free_security, file);
+}
+
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	return call_int_hook(file_ioctl, 0, file, cmd, arg);
@@ -1513,6 +1518,11 @@
 	return rc;
 }
 
+void security_task_post_alloc(struct task_struct *task)
+{
+	call_void_hook(task_post_alloc, task);
+}
+
 void security_task_free(struct task_struct *task)
 {
 	call_void_hook(task_free, task);
@@ -1704,6 +1714,11 @@
 	return call_int_hook(task_kill, 0, p, info, sig, cred);
 }
 
+void security_task_exit(struct task_struct *p)
+{
+	call_void_hook(task_exit, p);
+}
+
 int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			 unsigned long arg4, unsigned long arg5)
 {
@@ -2404,3 +2419,30 @@
 	return call_int_hook(locked_down, 0, what);
 }
 EXPORT_SYMBOL(security_locked_down);
+
+#ifdef CONFIG_PERF_EVENTS
+int security_perf_event_open(struct perf_event_attr *attr, int type)
+{
+	return call_int_hook(perf_event_open, 0, attr, type);
+}
+
+int security_perf_event_alloc(struct perf_event *event)
+{
+	return call_int_hook(perf_event_alloc, 0, event);
+}
+
+void security_perf_event_free(struct perf_event *event)
+{
+	call_void_hook(perf_event_free, event);
+}
+
+int security_perf_event_read(struct perf_event *event)
+{
+	return call_int_hook(perf_event_read, 0, event);
+}
+
+int security_perf_event_write(struct perf_event *event)
+{
+	return call_int_hook(perf_event_write, 0, event);
+}
+#endif /* CONFIG_PERF_EVENTS */

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 717a398..94dda93 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c

@@ -6847,6 +6847,67 @@
 	.lbs_msg_msg = sizeof(struct msg_security_struct),
 };
 
+#ifdef CONFIG_PERF_EVENTS
+static int selinux_perf_event_open(struct perf_event_attr *attr, int type)
+{
+	u32 requested, sid = current_sid();
+
+	if (type == PERF_SECURITY_OPEN)
+		requested = PERF_EVENT__OPEN;
+	else if (type == PERF_SECURITY_CPU)
+		requested = PERF_EVENT__CPU;
+	else if (type == PERF_SECURITY_KERNEL)
+		requested = PERF_EVENT__KERNEL;
+	else if (type == PERF_SECURITY_TRACEPOINT)
+		requested = PERF_EVENT__TRACEPOINT;
+	else
+		return -EINVAL;
+
+	return avc_has_perm(&selinux_state, sid, sid, SECCLASS_PERF_EVENT,
+			    requested, NULL);
+}
+
+static int selinux_perf_event_alloc(struct perf_event *event)
+{
+	struct perf_event_security_struct *perfsec;
+
+	perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL);
+	if (!perfsec)
+		return -ENOMEM;
+
+	perfsec->sid = current_sid();
+	event->security = perfsec;
+
+	return 0;
+}
+
+static void selinux_perf_event_free(struct perf_event *event)
+{
+	struct perf_event_security_struct *perfsec = event->security;
+
+	event->security = NULL;
+	kfree(perfsec);
+}
+
+static int selinux_perf_event_read(struct perf_event *event)
+{
+	struct perf_event_security_struct *perfsec = event->security;
+	u32 sid = current_sid();
+
+	return avc_has_perm(&selinux_state, sid, perfsec->sid,
+			    SECCLASS_PERF_EVENT, PERF_EVENT__READ, NULL);
+}
+
+static int selinux_perf_event_write(struct perf_event *event)
+{
+	struct perf_event_security_struct *perfsec = event->security;
+	u32 sid = current_sid();
+
+	return avc_has_perm(&selinux_state, sid, perfsec->sid,
+			    SECCLASS_PERF_EVENT, PERF_EVENT__WRITE, NULL);
+}
+#endif
+
 static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr),
 	LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction),
@@ -7084,6 +7145,14 @@
 	LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free),
 	LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free),
 #endif
+
+#ifdef CONFIG_PERF_EVENTS
+	LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open),
+	LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc),
+	LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free),
+	LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read),
+	LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write),
+#endif
 };
 
 static __init int selinux_init(void)

diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 32e9b03..7db2485 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h

@@ -244,6 +244,8 @@
 	  {"map_create", "map_read", "map_write", "prog_load", "prog_run"} },
 	{ "xdp_socket",
 	  { COMMON_SOCK_PERMS, NULL } },
+	{ "perf_event",
+	  {"open", "cpu", "kernel", "tracepoint", "read", "write"} },
 	{ NULL }
   };
 

diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 586b7ab..a4a86cb 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h

@@ -141,7 +141,11 @@
 };
 
 struct bpf_security_struct {
-	u32 sid;  /*SID of bpf obj creater*/
+	u32 sid;  /* SID of bpf obj creator */
+};
+
+struct perf_event_security_struct {
+	u32 sid;  /* SID of perf_event obj creator */
 };
 
 extern struct lsm_blob_sizes selinux_blob_sizes;

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 63038eb..b27a109 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h

@@ -199,6 +199,16 @@
 	BPF_CGROUP_UDP6_RECVMSG,
 	BPF_CGROUP_GETSOCKOPT,
 	BPF_CGROUP_SETSOCKOPT,
+	BPF_TRACE_RAW_TP,
+	BPF_TRACE_FENTRY,
+	BPF_TRACE_FEXIT,
+	BPF_MODIFY_RETURN,
+	BPF_LSM_MAC,
+	BPF_TRACE_ITER,
+	BPF_CGROUP_INET4_GETPEERNAME,
+	BPF_CGROUP_INET6_GETPEERNAME,
+	BPF_CGROUP_INET4_GETSOCKNAME,
+	BPF_CGROUP_INET6_GETSOCKNAME,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -2750,6 +2760,231 @@
  *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
  *
  *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * 	Description
+ * 		Write raw *data* blob into a special BPF perf event held by
+ * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * 		event must have the following attributes: **PERF_SAMPLE_RAW**
+ * 		as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * 		**PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * 		The *flags* are used to indicate the index in *map* for which
+ * 		the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * 		Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * 		to indicate that the index of the current CPU core should be
+ * 		used.
+ *
+ * 		The value to write, of *size*, is passed through eBPF stack and
+ * 		pointed by *data*.
+ *
+ * 		*ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ * 		This helper is similar to **bpf_perf_event_output**\ () but
+ * 		restricted to raw_tracepoint bpf programs.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * 	Description
+ * 		Safely attempt to read *size* bytes from user space address
+ * 		*unsafe_ptr* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * 	Description
+ * 		Safely attempt to read *size* bytes from kernel space address
+ * 		*unsafe_ptr* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe user address
+ * 		*unsafe_ptr* to *dst*. The *size* should include the
+ * 		terminating NUL byte. In case the string length is smaller than
+ * 		*size*, the target is not padded with further NUL bytes. If the
+ * 		string length is larger than *size*, just *size*-1 bytes are
+ * 		copied and the last byte is set to NUL.
+ *
+ * 		On success, the length of the copied string is returned. This
+ * 		makes this helper useful in tracing programs for reading
+ * 		strings, and more importantly to get its length at runtime. See
+ * 		the following snippet:
+ *
+ * 		::
+ *
+ * 			SEC("kprobe/sys_open")
+ * 			void bpf_sys_open(struct pt_regs *ctx)
+ * 			{
+ * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
+ * 			        int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ * 				                                  ctx->di);
+ *
+ * 				// Consume buf, for example push it to
+ * 				// userspace via bpf_perf_event_output(); we
+ * 				// can use res (the string length) as event
+ * 				// size, after checking its boundaries.
+ * 			}
+ *
+ * 		In comparison, using **bpf_probe_read_user()** helper here
+ * 		instead to read the string would require to estimate the length
+ * 		at compile time, and would often result in copying more memory
+ * 		than necessary.
+ *
+ * 		Another useful use case is when parsing individual process
+ * 		arguments or individual environment variables navigating
+ * 		*current*\ **->mm->arg_start** and *current*\
+ * 		**->mm->env_start**: using this helper and the return value,
+ * 		one can quickly iterate at the right offset of the memory area.
+ * 	Return
+ * 		On success, the strictly positive length of the string,
+ * 		including the trailing NUL character. On error, a negative
+ * 		value.
+ *
+ * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * 		to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * 	Return
+ * 		On success, the strictly positive length of the string,	including
+ * 		the trailing NUL character. On error, a negative value.
+ *
+ * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
+ *	Description
+ *		Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
+ *		*rcv_nxt* is the ack_seq to be sent out.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_send_signal_thread(u32 sig)
+ *	Description
+ *		Send signal *sig* to the thread corresponding to the current task.
+ *	Return
+ *		0 on success or successfully queued.
+ *
+ *		**-EBUSY** if work queue under nmi is full.
+ *
+ *		**-EINVAL** if *sig* is invalid.
+ *
+ *		**-EPERM** if no permission to send the *sig*.
+ *
+ *		**-EAGAIN** if bpf program can try again.
+ *
+ * u64 bpf_jiffies64(void)
+ *	Description
+ *		Obtain the 64bit jiffies
+ *	Return
+ *		The 64 bit jiffies
+ *
+ * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ *	Description
+ *		For an eBPF program attached to a perf event, retrieve the
+ *		branch records (struct perf_branch_entry) associated to *ctx*
+ *		and store it in	the buffer pointed by *buf* up to size
+ *		*size* bytes.
+ *	Return
+ *		On success, number of bytes written to *buf*. On error, a
+ *		negative value.
+ *
+ *		The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
+ *		instead	return the number of bytes required to store all the
+ *		branch entries. If this flag is set, *buf* may be NULL.
+ *
+ *		**-EINVAL** if arguments invalid or **size** not a multiple
+ *		of sizeof(struct perf_branch_entry).
+ *
+ *		**-ENOENT** if architecture does not support branch records.
+ *
+ * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ *	Description
+ *		Returns 0 on success, values for *pid* and *tgid* as seen from the current
+ *		*namespace* will be returned in *nsdata*.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EINVAL** if dev and inum supplied don't match dev_t and inode number
+ *              with nsfs of current task, or if dev conversion to dev_t lost high bits.
+ *
+ *		**-ENOENT** if pidns does not exists for the current task.
+ *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *	Description
+ *		Write raw *data* blob into a special BPF perf event held by
+ *		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *		event must have the following attributes: **PERF_SAMPLE_RAW**
+ *		as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *		**PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *		The *flags* are used to indicate the index in *map* for which
+ *		the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *		Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *		to indicate that the index of the current CPU core should be
+ *		used.
+ *
+ *		The value to write, of *size*, is passed through eBPF stack and
+ *		pointed by *data*.
+ *
+ *		*ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ *		This helper is similar to **bpf_perf_eventoutput**\ () but
+ *		restricted to raw_tracepoint bpf programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_netns_cookie(void *ctx)
+ * 	Description
+ * 		Retrieve the cookie (generated by the kernel) of the network
+ * 		namespace the input *ctx* is associated with. The network
+ * 		namespace cookie remains stable for its lifetime and provides
+ * 		a global identifier that can be assumed unique. If *ctx* is
+ * 		NULL, then the helper returns the cookie for the initial
+ * 		network namespace. The cookie itself is very similar to that
+ * 		of bpf_get_socket_cookie() helper, but for network namespaces
+ * 		instead of sockets.
+ * 	Return
+ * 		A 8-byte long opaque number.
+ *
+ * u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level)
+ * 	Description
+ * 		Return id of cgroup v2 that is ancestor of the cgroup associated
+ * 		with the current task at the *ancestor_level*. The root cgroup
+ * 		is at *ancestor_level* zero and each step down the hierarchy
+ * 		increments the level. If *ancestor_level* == level of cgroup
+ * 		associated with the current task, then return value will be the
+ * 		same as that of **bpf_get_current_cgroup_id**\ ().
+ *
+ * 		The helper is useful to implement policies based on cgroups
+ * 		that are upper in hierarchy than immediate cgroup associated
+ * 		with the current task.
+ *
+ * 		The format of returned id and helper limitations are same as in
+ * 		**bpf_get_current_cgroup_id**\ ().
+ * 	Return
+ * 		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ *	Description
+ *		Assign the *sk* to the *skb*. When combined with appropriate
+ *		routing configuration to receive the packet towards the socket,
+ *		will cause *skb* to be delivered to the specified socket.
+ *		Subsequent redirection of *skb* via  **bpf_redirect**\ (),
+ *		**bpf_clone_redirect**\ () or other methods outside of BPF may
+ *		interfere with successful delivery to the socket.
+ *
+ *		This operation is only valid from TC ingress path.
+ *
+ *		The *flags* argument must be zero.
+ *	Return
+ *		0 on success, or a negative errno in case of failure.
+ *
+ *		* **-EINVAL**		Unsupported flags specified.
+ *		* **-ENOENT**		Socket is unavailable for assignment.
+ *		* **-ENETUNREACH**	Socket is unreachable (wrong netns).
+ *		* **-EOPNOTSUPP**	Unsupported operation, for example a
+ *					call from outside of TC ingress.
+ *		* **-ESOCKTNOSUPPORT**	Socket type not supported (reuseport).
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2862,7 +3097,21 @@
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
 	FN(send_signal),		\
-	FN(tcp_gen_syncookie),
+	FN(tcp_gen_syncookie),		\
+	FN(skb_output),			\
+	FN(probe_read_user),		\
+	FN(probe_read_kernel),		\
+	FN(probe_read_user_str),	\
+	FN(probe_read_kernel_str),	\
+	FN(tcp_send_ack),		\
+	FN(send_signal_thread),		\
+	FN(jiffies64),			\
+	FN(read_branch_records),	\
+	FN(get_ns_current_pid_tgid),	\
+	FN(xdp_output),			\
+	FN(get_netns_cookie),		\
+	FN(get_current_ancestor_cgroup_id),	\
+	FN(sk_assign),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
commit	66127a9fe0c93b4c3cc69d840d1cd7213e77a0af	[log] [tgz]
author	COS Kernel Merge Bot <cloud-image-merge-automation@prod.google.com>	Sun Oct 10 01:59:13 2021 -0700
committer	COS Kernel Merge Bot <cloud-image-merge-automation@prod.google.com>	Sun Oct 10 01:59:14 2021 -0700
tree	1657230b62b87ede5ea1cd22daa4c1c009c1971d
parent	acece984078c1e1a59a5d965a39f89f2ab29b14f [diff]
parent	faaca480fd5cd1976b6db743c43ac1f8d583de72 [diff]