blob: 362ea15d174b7c0cc87663db1b48cd8a056ef2be [file] [log] [blame] [edit]
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
This file implements the client for the COS(Container-Optimized OS) Image update control plane.
It is reponsible for collecting and reporting the current instance status.
When run with the "init_device_policy" command, a default device policy will be
written to disk.
When run with the "monitor" command, it will periodically fetch update config
from the API backend by sending the instance status to the API.
When run with the "get_status" command, the current instance status is written
to stdout as a plain text formatted protobuf.
Example Output:
os_version: <
version_string: "16108.470.1"
channel: DEV
milestone: 89
>
update_status: UPDATE_STATUS_UPDATED_NEED_REBOOT
new_version: <
version_string: "16108.403.51"
>
update_check_timestamp: 1433971203
When run with the "show_config" command, the current config applied to the system
is written to stdout.
Example Output:
metrics_enabled: true
target_version_prefix: ""
update_scatter_seconds: 33
reboot_after_update: false
health_monitor_config: <
enforced: false
logging_enabled: false
monitoring_enabled: false
>
*/
package main
import (
"errors"
"flag"
"fmt"
"os"
"os/exec"
"regexp"
"strconv"
"strings"
"time"
"policy_manager/dbus"
"policy_manager/devicepolicy"
"policy_manager/imgstatus"
"policy_manager/policyenforcer"
"policy_manager/policymanagerproto"
"policy_manager/policymanagerutil"
"policy_manager/sysapi"
"policy_manager/systemd"
"github.com/golang/glog"
"github.com/golang/protobuf/proto"
)
// Command line flags for monitor mode.
var monitorModeFlags *flag.FlagSet
var updateCheckInterval uint64
// Command line flags for get_status mode.
var getStatusModeFlags *flag.FlagSet
// Command line flags for init_device_policy mode.
var initDevicePolicyModeFlags *flag.FlagSet
// Command line flags for show_config mode.
var showConfigModeFlags *flag.FlagSet
// Pattern for image name, e.g. "cos-dev-93-16442-0-0"
var imageNamePattern = regexp.MustCompile("^(cos|gci)-(beta|dev|stable)-([0-9]+)-([0-9]+-[0-9]+-[0-9]+)$")
const (
// Command names.
monitorModeCommand = "monitor"
initDevicePolicyCommand = "init_device_policy"
getStatusCommand = "get_status"
showConfigCommand = "show_config"
// updateDisabledStrategy is the value for 'update_strategy' field in InstanceConfig
// which can be used for disabling updates.
updateDisabledStrategy = "update_disabled"
)
func init() {
// Flags for monitor mode.
monitorModeFlags = flag.NewFlagSet(monitorModeCommand, flag.ExitOnError)
monitorModeFlags.Uint64Var(&updateCheckInterval,
"update_check_interval", 3600,
"number of seconds to wait between successive sending status ")
monitorModeFlags.Usage = func() {
fmt.Println("monitor changes to instance config periodically without terminating")
// Print the flags and their default values.
fmt.Println("Flags:")
monitorModeFlags.PrintDefaults()
}
// Flags for get_status mode.
getStatusModeFlags = flag.NewFlagSet(getStatusCommand, flag.ExitOnError)
getStatusModeFlags.Usage = func() {
fmt.Println("Prints the current status of the instance to stdout.")
}
// Command line flags for init_device_policy mode.
initDevicePolicyModeFlags = flag.NewFlagSet(initDevicePolicyCommand,
flag.ExitOnError)
initDevicePolicyModeFlags.Usage = func() {
fmt.Println("Initialize the device policy for the instance.")
}
// Flags for show_config.
showConfigModeFlags = flag.NewFlagSet(showConfigCommand, flag.ExitOnError)
showConfigModeFlags.Usage = func() {
fmt.Println("Prints the current device policy for this instance to stdout.")
}
// Log to stderr only by default
flag.Set("logtostderr", "true")
}
// getOSVersionOrDie returns the currently running OS Version. In case of failure, it logs
// the error message and exits the process.
func getOSVersionOrDie() *policymanagerproto.OSVersion {
var osversion *policymanagerproto.OSVersion
var err error
apiHandler, err := sysapi.NewHandler()
if err != nil {
glog.Exit(err)
}
reporter := imgstatus.NewReporter(apiHandler, nil, nil, 0)
if osversion, err = reporter.GetOSVersion(); err != nil {
glog.Exitf("Failed to determine OS Version: %s", err)
}
return osversion
}
// resolveLocalUpdateStrategy tries to convert the UpdateStrategy string in given userConfig
// into appropriate TargetVersionPrefix. The generated TargetVersionPrefix is set back in
// the provided userConfig. If the config was resolved successfully, this function returns
// true. Otherwise it returns false (in which case, supplied userConfig is unchanged).
func resolveLocalUpdateStrategy(userConfig *policymanagerproto.InstanceConfig,
osversion *policymanagerproto.OSVersion) error {
if userConfig == nil {
// Nothing to resolve.
return nil
}
if osversion == nil {
// This should never happen.
return errors.New("osversion is nil, which should never happen")
}
// If user has explicitly specified 'target_version_prefix', use it. It takes precedence
// over any 'update_strategy'.
if userConfig.TargetVersionPrefix != nil {
return nil
}
// The user has not specified any update strategy or it is empty. Nothing to do.
if userConfig.UpdateStrategy == nil || userConfig.GetUpdateStrategy() == "" {
return nil
}
// Check if the specified update_strategy is known to us.
// Check if update_strategy is 'update_disabled'.
if userConfig.GetUpdateStrategy() == updateDisabledStrategy {
// Set target_version_prefix to current os-version to disable updates.
userConfig.TargetVersionPrefix = proto.String(osversion.GetVersionString())
return nil
}
// Check if update_strategy is an entire image name. e.g. "cos-dev-93-16442-0-0".
osVersion, err := getOSVersionFromStrategy(userConfig.GetUpdateStrategy())
if err == nil {
userConfig.TargetVersionPrefix = proto.String(osVersion.GetVersionString())
return nil
} else {
glog.Error(err)
}
// User hasn't specified any strategy we know. So cannot resolve it locally.
return errors.New("user hasn't specified any strategy we know")
}
// resolveEnforcementConfig sets enforcement related fields in userConfig to
// true if either logging or monitoring feature is enabled, or if the device
// policy file has the corresponding enforcement option set to true.
// If failed to find/parse the device policy file, simply assumes the enforcement
// options are set to false on disk.
func resolveEnforcementConfig(userConfig *policymanagerproto.InstanceConfig, manager devicepolicy.Manager) {
if userConfig == nil {
return
}
// Set HealthMonitorConfig.Enforced to false as default value.
if userConfig.HealthMonitorConfig == nil {
userConfig.HealthMonitorConfig = new(policymanagerproto.HealthMonitorConfig)
}
userConfig.HealthMonitorConfig.Enforced = proto.Bool(false)
// Set HealthMonitorConfig.Enforced to true if either logging or monitoring is enabled and return.
if userConfig.HealthMonitorConfig.GetLoggingEnabled() || userConfig.HealthMonitorConfig.GetMonitoringEnabled() {
userConfig.HealthMonitorConfig.Enforced = proto.Bool(true)
return
}
// Read the existing state on disk.
onDiskConfig, err := manager.GetInstanceConfig()
if err != nil {
glog.Error(err)
return
}
// userConfig should respect onDiskConfig when both logging and monitoring are disabled.
if onDiskConfig != nil && onDiskConfig.HealthMonitorConfig != nil {
userConfig.HealthMonitorConfig.Enforced = proto.Bool(onDiskConfig.HealthMonitorConfig.GetEnforced())
}
}
// isUpdateDisabled checks if the given *resolved* instance config specifies that updates
// should be disabled.
func isUpdateDisabled(userConfig *policymanagerproto.InstanceConfig) bool {
if userConfig == nil {
return false
}
return userConfig.GetUpdateStrategy() == updateDisabledStrategy
}
// stopUpdateEngine stops update engine.
func stopUpdateEngine(noBlock bool) error {
args := []string{"stop", "update-engine"}
if noBlock {
args = append(args, "--no-block")
}
cmd := exec.Command("systemctl", args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
}
// updateInstanceConfig queries the API (if needed) for new instance config and
// changes the device policy stored in the policy file. userConfig is what we
// got from GCE metadata (it can even be nil). The API query to backend is not
// made if the given userConfig can be locally understood by Policy Manager.
// Note that the given devicepolicy.Manager should be initialized.
func updateInstanceConfig(status *policymanagerproto.InstanceStatus,
userConfig *policymanagerproto.InstanceConfig, manager devicepolicy.Manager,
ueClient dbus.UpdateEngineClient) error {
var err error
if status == nil {
return fmt.Errorf("no instance status")
}
resolveEnforcementConfig(userConfig, manager)
if err = resolveLocalUpdateStrategy(userConfig, status.GetOsVersion()); err != nil {
return fmt.Errorf("error resolving update strategy (%s). Instance config will not be applied.", err)
}
if isUpdateDisabled(userConfig) {
glog.Info("Detected updates disabled: stopping update engine...")
if err := stopUpdateEngine(false); err != nil {
glog.Errorf("failed to stop update engine: %v", err)
}
}
if err = switchChannelIfNeeded(ueClient, status, userConfig); err != nil {
return fmt.Errorf("error switching channel (%s). Instance config will not be applied.", err)
}
return manager.SetInstanceConfig(userConfig)
}
// switchChannelIfNeeded() first checks whether the target version that user want
// to update to is on a channel that is different from the channel of the current
// version. If so, it will call update-engine dbus method to set target channel.
func switchChannelIfNeeded(ueClient dbus.UpdateEngineClient,
status *policymanagerproto.InstanceStatus, userConfig *policymanagerproto.InstanceConfig) error {
if userConfig == nil || userConfig.UpdateStrategy == nil {
return nil
}
if osVersion, err := getOSVersionFromStrategy(userConfig.GetUpdateStrategy()); err == nil {
currentChannel := policymanagerproto.ReleaseChannel.String(status.GetOsVersion().GetChannel())
currentChannel = strings.ToLower(currentChannel)
targetChannel := policymanagerproto.ReleaseChannel.String(osVersion.GetChannel())
targetChannel = strings.ToLower(targetChannel)
if currentChannel != targetChannel {
channelConfig := fmt.Sprintf("%s-channel", targetChannel)
if err := ueClient.SetChannel(channelConfig); err != nil {
glog.Errorf("channel switch from %s to %s failed: %s.",
currentChannel, targetChannel, err)
return err
} else {
glog.Infof("channel switch from %s to %s succeeded.",
currentChannel, targetChannel)
}
}
}
return nil
}
// getOSVersionFromStrategy parses update_strategy and returns the os version
// which including channel, milestone and version string.
func getOSVersionFromStrategy(strategy string) (*policymanagerproto.OSVersion, error) {
match := imageNamePattern.FindStringSubmatch(strategy)
if match == nil {
return nil, fmt.Errorf("error parsing update_strategy: %s", strategy)
}
channelValue := policymanagerproto.ReleaseChannel_value[strings.ToUpper(match[2])]
channel := policymanagerproto.ReleaseChannel(channelValue)
milestone, _ := strconv.ParseUint(match[3], 10, 32)
versionString := strings.Replace(match[4], "-", ".", -1)
osVersion := new(policymanagerproto.OSVersion)
osVersion.VersionString = proto.String(versionString)
osVersion.Milestone = proto.Uint32(uint32(milestone))
osVersion.Channel = &channel
return osVersion, nil
}
// processUEStatus processes the update status of update-engine. It issues a machine
// reboot if appropriate.
func processUEStatus(ueStatus dbus.UEGetStatusResponse, manager devicepolicy.Manager,
apiHandler sysapi.APIHandler) error {
if ueStatus.UpdateStatus == "UPDATE_STATUS_UPDATED_NEED_REBOOT" {
config, err := manager.GetInstanceConfig()
if err != nil {
return fmt.Errorf("failed to get Instance config: %v, "+
"ignore UPDATED_NEED_REBOOT", err)
}
if config.GetRebootAfterUpdate() == false {
glog.Info("New version %v is installed but not rebooting. "+
"Please reboot manually.", ueStatus.NewVersion)
return nil
}
if config.GetTargetVersionPrefix() == "" || config.GetTargetVersionPrefix() == ueStatus.NewVersion {
glog.Infof("New COS version %v has been installed. Rebooting...", ueStatus.NewVersion)
_, _, err := apiHandler.RunCommand("/sbin/shutdown", "-r", "now")
if err != nil {
return fmt.Errorf("reboot attempt failed: %v", err)
}
} else {
return fmt.Errorf(
"installed new version %v doesn't match "+
"instance config target_version_prefix %v, "+
"ignore UPDATED_NEED_REBOOT",
ueStatus.NewVersion, config.GetTargetVersionPrefix())
}
}
return nil
}
// initDevicePolicyOrDie initializes the device policy. It's success ensures that the default
// devicepolicy file and all required keys to access it are present and consistent. If
// something fails, this function logs the error message and exists the process.
func initDevicePolicyOrDie(manager devicepolicy.Manager) {
// Fetch any user specified config and use it to initialize the devicepolicy.
retriever := policymanagerutil.NewMetadataRetriever(policymanagerutil.GCEMetadataURL)
userConfig := policymanagerutil.FetchUserConfig(retriever)
osversion := getOSVersionOrDie()
resolveEnforcementConfig(userConfig, manager)
resolveLocalUpdateStrategy(userConfig, osversion)
if isUpdateDisabled(userConfig) {
glog.Info("Detected updates disabled: stopping update engine...")
// Use --no-block because update-engine depends on init-device-policy
if err := stopUpdateEngine(true); err != nil {
glog.Errorf("failed to stop update-engine: %v", err)
}
}
if err := manager.InitDevicePolicy(userConfig); err != nil {
glog.Exit(err)
}
}
// handleInitDevicePolicyCmd is the handler function for initializing the device
// policy.
func handleInitDevicePolicyCmd(args []string) {
glog.Info("Started in init device policy mode")
// Parse flags to check for "--help" flag.
if err := initDevicePolicyModeFlags.Parse(args); err != nil {
glog.Exit(err)
}
apiHandler, err := sysapi.NewHandler()
if err != nil {
glog.Exit(err)
}
manager := devicepolicy.NewManager(apiHandler)
initDevicePolicyOrDie(manager)
glog.Info("Device policy initialized successfully!")
}
// handleMonitorCmd is the handler function for running Spiny in monitor mode,
// which watches the metadata for new update configuration and periodically
// sends the status updates.
func handleMonitorCmd(args []string) {
glog.Info("Started in monitor mode")
if err := monitorModeFlags.Parse(args); err != nil {
glog.Exit(err)
}
// Get instance ID.
retriever := policymanagerutil.NewMetadataRetriever(policymanagerutil.GCEMetadataURL)
var instanceID uint64
for {
id, err := retriever.GetInstanceID()
if err == nil {
instanceID = id
break
}
glog.Error(err)
time.Sleep(1 * time.Minute)
}
glog.Infof("Detected instance ID is: %d", instanceID)
apiHandler, err := sysapi.NewHandler()
if err != nil {
glog.Exit(err)
}
manager := devicepolicy.NewManager(apiHandler)
initDevicePolicyOrDie(manager)
systemdClient := systemd.NewSystemdClient(apiHandler)
healthMonitor := policyenforcer.NewHealthMonitorNPD()
policyEnforcer := policyenforcer.NewPolicyEnforcer(systemdClient, manager, healthMonitor)
ueClient, err := dbus.NewUpdateEngineClient()
if err != nil {
glog.Exit(err)
}
reporter := imgstatus.NewReporter(apiHandler, ueClient, policyEnforcer, instanceID)
userConfigUpdate := policymanagerutil.SubscribeUserConfig(retriever)
ueStatusUpdate, err := ueClient.SubscribeStatusUpdate()
if err != nil {
glog.Exit(err)
}
for {
select {
case userConfig := <-userConfigUpdate:
// userConfigUpdate returns the latest value of user config when the metadata are updated.
status, err := reporter.GetStatus()
if err != nil {
glog.Error(err)
}
glog.Infof("Using InstanceConfig: %v", userConfig)
if err := updateInstanceConfig(status, userConfig, manager, ueClient); err != nil {
glog.Warning(err)
}
// Send an update check request to update-engine.
if err = ueClient.AttemptUpdate(); err != nil {
glog.Warning(err)
}
go func() {
if err = policyEnforcer.UpdateHealthMonitorState(); err != nil {
glog.Error(err)
}
}()
case ueStatus := <-ueStatusUpdate:
glog.V(1).Infof("Received status update from update-engine: %+v", ueStatus)
// Only send interested status to API server
if ueStatus.UpdateStatus == "UPDATE_STATUS_UPDATED_NEED_REBOOT" {
if err := processUEStatus(ueStatus, manager, apiHandler); err != nil {
glog.Error(err)
}
}
}
}
}
// handleGetStatusCmd is the handler function for printing the current status
// to stdout.
func handleGetStatusCmd(args []string) {
glog.Info("Started in get status mode")
// Parse flags to check for "--help" flag.
if err := getStatusModeFlags.Parse(args); err != nil {
glog.Exit(err)
}
apiHandler, err := sysapi.NewHandler()
if err != nil {
glog.Exit(err)
}
ueClient, err := dbus.NewUpdateEngineClient()
if err != nil {
glog.Exit(err)
}
healthMonitor := policyenforcer.NewHealthMonitorNPD()
policyEnforcer := policyenforcer.NewPolicyEnforcer(
systemd.NewSystemdClient(apiHandler), nil, healthMonitor)
// Make an effort to get instance ID. On error just log and move on.
retriever := policymanagerutil.NewMetadataRetriever(policymanagerutil.GCEMetadataURL)
instanceID, err := retriever.GetInstanceID()
if err != nil {
glog.Error(err)
}
reporter := imgstatus.NewReporter(apiHandler, ueClient, policyEnforcer, instanceID)
status, err := reporter.GetStatus()
if err != nil {
glog.Error(err)
}
fmt.Println(proto.MarshalTextString(status))
}
// handleShowConfigCmd prints the current InstanceConfig to stdout.
func handleShowConfigCmd(args []string) {
// Parse flags to check for "--help" flag.
if err := showConfigModeFlags.Parse(args); err != nil {
glog.Exit(err)
}
apiHandler, err := sysapi.NewHandler()
if err != nil {
glog.Exit(err)
}
manager := devicepolicy.NewManager(apiHandler)
config, err := manager.GetInstanceConfig()
if err != nil {
glog.Exit(err)
}
fmt.Println(proto.MarshalTextString(config))
}
func main() {
flag.Usage = func() {
fmt.Println("Usage: device_policy_manager [logging flags] command [args...]")
fmt.Println("")
fmt.Println("Supported logging flags:")
flag.PrintDefaults()
fmt.Println("")
fmt.Printf("Supported commands:\n%s, %s, %s, %s\n",
monitorModeCommand, initDevicePolicyCommand,
getStatusCommand, showConfigCommand)
}
// Parse the glog flags.
flag.Parse()
// If no flags are left after the glog flags are processed, print usage
// message and exit.
if flag.NArg() == 0 {
flag.Usage()
return
}
// Get the command and the arguments for the command.
command := flag.Arg(0)
args := flag.Args()[1:]
switch command {
case monitorModeCommand:
handleMonitorCmd(args)
case initDevicePolicyCommand:
handleInitDevicePolicyCmd(args)
case getStatusCommand:
handleGetStatusCmd(args)
case showConfigCommand:
handleShowConfigCmd(args)
default:
glog.Errorf("unrecognized command: %s\n", command)
flag.Usage()
}
}