blob: e941abf32c7bc3fd1b7f4f5857aa1f434379cbd5 [file] [log] [blame]
// Copyright 2021 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package monitor
import (
const (
// instanceConfigFile is the file that stores the config related to
// policy manager.
instanceConfigFile = "/var/lib/devicesettings/instance_config"
// devicePolicyFile is the default file of the device policy. This file is
// used by crash-sender service and update-engine serivice to get any
// config related to them.
devicePolicyFile = "/var/lib/devicesettings/policy"
// publicKeyFile is the file containing the public key that will be used
// to verify the signature on the device policy file.
// The key will be encoded using DER-encoded PKIX format.
// Note that the file name is decided by Chrome OS.
publicKeyFile = "/var/lib/devicesettings/owner.key"
// privateKeyFile is the file containing the private key that will be
// used to sign the policy file.
// The key will be encoded using ASN.1 DER encoded form.
// Note that we are signing the policy file locally since we do not want
// to do any key management on the server side.
privateKeyFile = "/var/lib/devicesettings/private.key"
// GCEMetadataURL is the URL of metadata server where we can query
// metadata tags and their values.
gceMetadataURL = ""
systemctlCmd = "systemctl"
var serviceMonitor = map[string]string{
"loggingService": "",
"monitoringService": "node-problem-detector.service",
"metricsService": "crash-reporter.service",
"updateService": "update-engine.service",
// resolveEnforcementConfig sets enforcement related fields in userConfig to
// true if either logging or monitoring feature is enabled, or if the device
// policy file has the corresponding enforcement option set to true.
// If failed to find/parse the device policy file, simply assumes the enforcement
// options are set to false on disk.
func resolveEnforcementConfig(userConfig *protos.InstanceConfig) error {
if userConfig == nil {
return errors.New("value of userConfig is nil")
// Set HealthMonitorConfig.Enforced to false as default value.
if userConfig.HealthMonitorConfig == nil {
userConfig.HealthMonitorConfig = new(protos.HealthMonitorConfig)
userConfig.HealthMonitorConfig.Enforced = proto.Bool(false)
// Set HealthMonitorConfig.Enforced to true if either logging or monitoring is enabled and return.
if userConfig.HealthMonitorConfig.GetLoggingEnabled() || userConfig.HealthMonitorConfig.GetMonitoringEnabled() {
userConfig.HealthMonitorConfig.Enforced = proto.Bool(true)
return nil
// Read the existing state on disk.
onDiskConfig, err := devicepolicy.GetInstanceConfig(instanceConfigFile)
if err != nil {
return err
// userConfig should respect onDiskConfig when both logging and monitoring are disabled.
if onDiskConfig != nil && onDiskConfig.HealthMonitorConfig != nil {
userConfig.HealthMonitorConfig.Enforced = proto.Bool(onDiskConfig.HealthMonitorConfig.GetEnforced())
return nil
// updateInstanceConfig queries the API (if needed) for new instance config and
// changes the device policy stored in the policy file. userConfig is what we
// got from GCE metadata (it can even be nil). The API query to backend is not
// made if the given userConfig can be locally understood by Policy Manager.
// Note that the given devicepolicy.Manager should be initialized.
func updateInstanceConfig(userConfig *protos.InstanceConfig) error {
if err := resolveEnforcementConfig(userConfig); err != nil {
return err
return devicepolicy.SetInstanceConfig(userConfig, devicePolicyFile, instanceConfigFile, publicKeyFile, privateKeyFile)
// initDevicePolicyOrDie initializes the device policy. It's success ensures that the default
// devicepolicy file and all required keys to access it are present and consistent. If
// something fails, this function logs the error message and exists the process.
func initDevicePolicyOrDie() error {
initConfig := &protos.InstanceConfig{
MetricsEnabled: proto.Bool(false),
UpdateStrategy: proto.String(""),
HealthMonitorConfig: &protos.HealthMonitorConfig{
Enforced: proto.Bool(false),
LoggingEnabled: proto.Bool(false),
MonitoringEnabled: proto.Bool(false),
if err := resolveEnforcementConfig(initConfig); err != nil {
return err
// Stopping the crash-reporter service. When GCE instance boots, crash-reporter service
// is in `Activating` state due to which it is returning error and we don't
// need this service until cos-metrics-enabled is set to true.
systemdClient := systemd.NewSystemdClient(systemctlCmd)
if err := systemdClient.StopUnit(serviceMonitor["metricsService"]); err != nil {
return err
return devicepolicy.InitDevicePolicy(initConfig, devicePolicyFile, instanceConfigFile, publicKeyFile, privateKeyFile)
// InitDevicePolicy is the handler function for initializing the device policy.
func InitDevicePolicy() {
glog.Info("Starting device policy initialization...")
if err := initDevicePolicyOrDie(); err != nil {
glog.Exitf("Error initializing device policy: %v", err)
glog.Info("Device policy initialized successfully!")
// HandleMonitorCmd is the handler function for running Policy Manager
// in monitor mode, which watches the metadata for new update configuration
// and periodically sends the status updates.
func HandleMonitorCmd() {
glog.Info("Started in monitor mode")
// Send notify command to notify systemd if initialization was successful
if _, _, err := sysapi.RunCommand("systemd-notify", "--ready", "--status='Initialization was successful'"); err != nil {
glog.Errorf("error in notifying systemd: %s\n", err)
// Get instance ID.
var instanceID uint64
for {
id, err := configfetcher.GetInstanceID()
if err == nil {
instanceID = id
glog.Errorf("error while fetching instance id: %v", err)
time.Sleep(1 * time.Minute)
glog.Infof("Detected instance ID is: %d", instanceID)
systemdClient := systemd.NewSystemdClient(systemctlCmd)
policyEnforcer := policyenforcer.NewPolicyEnforcer(*systemdClient)
userConfig := make(chan *protos.InstanceConfig)
go configfetcher.PollUserConfig(userConfig, gceMetadataURL)
for {
instanceConfig := <-userConfig
// userConfigUpdate returns the latest value of user config when the metadata are updated.
glog.Infof("Using InstanceConfig: %v", instanceConfig)
if err := updateInstanceConfig(instanceConfig); err != nil {
if err := policyEnforcer.UpdateServiceState(instanceConfigFile, serviceMonitor); err != nil {
glog.Errorf("error while updating instance: %v", err)