Project-lakitu:NPD: Add detecting bootdiskinconsistency
BUG=b/151644632
TEST=presubmit
RELEASE_NOTE=None
Change-Id: I84b824d76ddf971604b946a0ff81bb88d75b120b
Reviewed-on: https://cos-review.googlesource.com/c/cos/overlays/board-overlays/+/9623
Tested-by: Cusky Presubmit Bot <presubmit@cos-infra-prod.iam.gserviceaccount.com>
Reviewed-by: Robert Kolchmeyer <rkolchmeyer@google.com>
Reviewed-by: Roy Yang <royyang@google.com>
diff --git a/project-lakitu/app-admin/node-problem-detector/files/boot-disk-size-consistency-monitor.json b/project-lakitu/app-admin/node-problem-detector/files/boot-disk-size-consistency-monitor.json
new file mode 100644
index 0000000..6de019f
--- /dev/null
+++ b/project-lakitu/app-admin/node-problem-detector/files/boot-disk-size-consistency-monitor.json
@@ -0,0 +1,19 @@
+{
+ "plugin": "custom",
+ "pluginConfig": {
+ "invoke_interval": "30m",
+ "timeout": "5s",
+ "max_output_length": 80,
+ "enable_message_change_based_condition_update": false
+ },
+ "source": "boot-disk-size-consistency-monitor",
+ "metricsReporting": true,
+ "rules": [
+ {
+ "type": "temporary",
+ "reason": "BootDiskSizeInconsistency",
+ "path": "/etc/node_problem_detector/check_boot_disk_size_consistency.sh",
+ "timeout": "3s"
+ }
+ ]
+}
diff --git a/project-lakitu/app-admin/node-problem-detector/files/check_boot_disk_size_consistency.sh b/project-lakitu/app-admin/node-problem-detector/files/check_boot_disk_size_consistency.sh
new file mode 100644
index 0000000..1f3ab03
--- /dev/null
+++ b/project-lakitu/app-admin/node-problem-detector/files/check_boot_disk_size_consistency.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+set -o errexit
+set -o pipefail
+
+# This plugin checks if the resize2f partition failed during the boot process.
+
+readonly OK=0
+readonly NONOK=1
+readonly UNKNOWN=2
+readonly DISKFRACTIONMINIMUM=0.9
+readonly ROOTDEVICE="sda1"
+
+if ! grep $ROOTDEVICE /proc/partitions > /dev/null; then
+ echo "Error retrieving requested disk size"
+ exit $UNKNOWN
+fi
+
+requestedDiskSize="$(grep $ROOTDEVICE /proc/partitions | awk 'NR == 1 {printf $3}')"
+
+if ! df -P "/dev/$ROOTDEVICE" > /dev/null; then
+ echo "Error retrieving actual disk size"
+ exit $UNKNOWN
+fi
+
+actualDiskSize="$(df -P "/dev/$ROOTDEVICE" | awk 'NR == 2 {printf $2}')"
+
+ratio=$(echo "$actualDiskSize/$requestedDiskSize" | bc -l)
+
+# if the ratio of actualdiskSize to requestedDiskSize is less than 0.9, then it
+# implies there is a problem occuring during the resize2f partition.
+if (( $(echo "$ratio < $DISKFRACTIONMINIMUM" | bc -l) )); then
+ echo "DiskSizeCheck failure occured"
+ exit $NONOK
+else
+ echo "DiskSizeCheck is successful"
+ exit $OK
+fi
diff --git a/project-lakitu/app-admin/node-problem-detector/files/node-problem-detector.service b/project-lakitu/app-admin/node-problem-detector/files/node-problem-detector.service
index 7482232..ae92cef 100644
--- a/project-lakitu/app-admin/node-problem-detector/files/node-problem-detector.service
+++ b/project-lakitu/app-admin/node-problem-detector/files/node-problem-detector.service
@@ -11,6 +11,7 @@
--config.system-stats-monitor=/etc/node_problem_detector/system-stats-monitor.json \
--config.system-log-monitor=/etc/node_problem_detector/kernel-monitor.json \
--config.system-log-monitor=/etc/node_problem_detector/docker-monitor.json \
+ --config.custom-plugin-monitor=/etc/node_problem_detector/boot-disk-size-consistency-monitor.json \
--exporter.stackdriver=/etc/node_problem_detector/stackdriver-exporter.json \
diff --git a/project-lakitu/app-admin/node-problem-detector/node-problem-detector-0.8.5-r1.ebuild b/project-lakitu/app-admin/node-problem-detector/node-problem-detector-0.8.5-r2.ebuild
similarity index 100%
rename from project-lakitu/app-admin/node-problem-detector/node-problem-detector-0.8.5-r1.ebuild
rename to project-lakitu/app-admin/node-problem-detector/node-problem-detector-0.8.5-r2.ebuild
diff --git a/project-lakitu/app-admin/node-problem-detector/node-problem-detector-0.8.5.ebuild b/project-lakitu/app-admin/node-problem-detector/node-problem-detector-0.8.5.ebuild
index 8f4c4a8..1555fd6 100644
--- a/project-lakitu/app-admin/node-problem-detector/node-problem-detector-0.8.5.ebuild
+++ b/project-lakitu/app-admin/node-problem-detector/node-problem-detector-0.8.5.ebuild
@@ -46,8 +46,13 @@
doins "${FILESDIR}"/docker-monitor.json
doins "${FILESDIR}"/kernel-monitor.json
doins "${FILESDIR}"/stackdriver-exporter.json
+ doins "${FILESDIR}"/boot-disk-size-consistency-monitor.json
+ # add exec permission for to check boot disk size consistency
+ insopts -m0100
+ doins "${FILESDIR}"/check_boot_disk_size_consistency.sh
dosbin bin/node-problem-detector
systemd_dounit "${FILESDIR}"/node-problem-detector.service
}
+