blob: 20ace689ec8d046d20ff04e6ade81a108888d8c9 [file] [log] [blame] [edit]
//go:build windows
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package windows
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/Microsoft/hcsshim"
"github.com/Microsoft/hcsshim/pkg/cimfs"
cimlayer "github.com/Microsoft/hcsshim/pkg/ociwclayer/cim"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/core/snapshots/storage"
"github.com/containerd/containerd/v2/internal/kmutex"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/continuity/fs"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"github.com/containerd/platforms"
"github.com/containerd/plugin"
"github.com/containerd/plugin/registry"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/sirupsen/logrus"
)
const (
// An unformatted scratch is used with ReFS. This scratch gets formatted with ReFS inside the guest before we run any
// containers. ReFS requires the minimum size of 40GB.
minimumUnformattedScratchSizeInBytes uint64 = 40 * 1024 * 1024 * 1024
)
// BlockCIMSnapshotterConfig contains configuration options for the block CIM snapshotter.
type BlockCIMSnapshotterConfig struct {
// EnableLayerIntegrity enables data integrity checking for CIM layers.
// When enabled, CIMs will be verified and sealed on close to ensure tamper-proofing.
EnableLayerIntegrity bool `toml:"enable_layer_integrity"`
// AppendVHDFooter controls whether VHD footers are appended to layer CIMs and merged CIMs.
AppendVHDFooter bool `toml:"append_vhd_footer"`
// UnformattedScratch will prepare the scratch VHDs without formatting them with
// NTFS. This is mainly useful in case when we want to format the scratch on
// inside the guest (hyperv isolation) before starting containers. Usually, this
// config should be used along with the EnableLayerIntegrity config. As of now,
// you can't run process isolated windows containers if you enable this config
// since there is nothing on the host that will format this scratch.
UnformattedScratch bool `toml:"unformatted_scratch"`
}
// blockCIMSnapshotter is a snapshotter that uses block CIMs for storing windows container image layers.
type blockCIMSnapshotter struct {
*windowsBaseSnapshotter
mergeLock kmutex.KeyedLocker
config *BlockCIMSnapshotterConfig
}
var _ snapshots.Snapshotter = &blockCIMSnapshotter{}
func init() {
registry.Register(&plugin.Registration{
Type: plugins.SnapshotPlugin,
ID: "blockcim",
Config: &BlockCIMSnapshotterConfig{},
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Platforms = []ocispec.Platform{platforms.DefaultSpec()}
config, ok := ic.Config.(*BlockCIMSnapshotterConfig)
if !ok {
return nil, fmt.Errorf("invalid block CIM snapshotter configuration")
}
log.G(ic.Context).WithField("config", config).Trace("initializing blockcim snapshotter")
return NewBlockCIMSnapshotter(ic.Properties[plugins.PropertyRootDir], config)
},
})
}
func NewBlockCIMSnapshotter(root string, config *BlockCIMSnapshotterConfig) (snapshots.Snapshotter, error) {
if !cimfs.IsBlockCimSupported() {
return nil, fmt.Errorf("host windows version doesn't support block CIMs: %w", plugin.ErrSkipPlugin)
}
baseSn, err := newBaseSnapshotter(root)
if err != nil {
return nil, err
}
if config.UnformattedScratch {
// If UnformattedScratch is enabled, we don't need to create the differing VHD as
// the scratch isn't even formatted. We just create 1 VHD and copy it for every
// snapshot. In cases where snapshot creation specifies a different size of the
// VHD, then we create a new one because we can't expand unformatted VHD. Also
// note that unformatted VHDs are mostly used with ReFS, and ReFS requires a
// minimum of 40GB space, so use that as the default.
// create this VHD with `templateVHDName` as that gets copied in snapshot creation
err = createScratchVHD(context.Background(), filepath.Join(root, templateVHDName), WithSize(minimumUnformattedScratchSizeInBytes))
} else {
// If UnformattedScratch is disabled, we make the base VHD and differing VHD and
// copy the differing VHD for every new scratch snapshot. If a different size is
// specified, we use ExpandVHD to change the size.
err = createDifferencingScratchVHDs(context.Background(), root)
}
if err != nil {
return nil, fmt.Errorf("failed to prepare scratch VHDs: %w", err)
}
return &blockCIMSnapshotter{
windowsBaseSnapshotter: baseSn,
mergeLock: kmutex.New(),
config: config,
}, nil
}
func (s *blockCIMSnapshotter) getSnapshotDir(id string) string {
return filepath.Join(s.root, "snapshots", id)
}
func (s *blockCIMSnapshotter) getSingleFileCIMBlockPath(id string) string {
return filepath.Join(s.root, "snapshots", id, layerBlockName)
}
func (s *blockCIMSnapshotter) getLayerCIMPathFromCIMBlock(cimBlockPath string) string {
return filepath.Join(cimBlockPath, layerCIMName)
}
// context MUST be a transaction context
func (s *blockCIMSnapshotter) snapshotInfoFromID(ctx context.Context, snIDs []string) (snInfos []snapshots.Info, err error) {
idToKey, err := storage.IDMap(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get snapshot ID to Key map: %w", err)
}
for _, sid := range snIDs {
_, info, _, err := storage.GetInfo(ctx, idToKey[sid])
if err != nil {
return nil, fmt.Errorf("failed to get info for snapshot %s: %w", sid, err)
}
snInfos = append(snInfos, info)
}
return snInfos, nil
}
// returns a BlockCIM representing the snapshot
func (s *blockCIMSnapshotter) getSnapshotBlockCIM(ctx context.Context, snID string, snInfo snapshots.Info) (*cimfs.BlockCIM, error) {
if snInfo.Kind != snapshots.KindCommitted {
return nil, fmt.Errorf("requested BlockCIM of uncommitted snapshot `%s` ", snInfo.Name)
}
return &cimfs.BlockCIM{
CimName: layerCIMName,
Type: cimfs.BlockCIMTypeSingleFile,
BlockPath: s.getSingleFileCIMBlockPath(snID),
}, nil
}
func (s *blockCIMSnapshotter) Usage(ctx context.Context, key string) (usage snapshots.Usage, err error) {
var (
id string
info snapshots.Info
)
err = s.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
id, info, usage, err = storage.GetInfo(ctx, key)
return err
})
if err != nil {
return snapshots.Usage{}, err
}
if info.Kind == snapshots.KindActive {
path := s.getSnapshotDir(id)
du, err := fs.DiskUsage(ctx, path)
if err != nil {
return snapshots.Usage{}, err
}
usage = snapshots.Usage(du)
}
return usage, nil
}
func (s *blockCIMSnapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return s.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
}
func (s *blockCIMSnapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return s.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
}
func (s *blockCIMSnapshotter) Mounts(ctx context.Context, key string) (_ []mount.Mount, err error) {
var snapshot storage.Snapshot
err = s.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
snapshot, err = storage.GetSnapshot(ctx, key)
if err != nil {
return fmt.Errorf("failed to get snapshot mount: %w", err)
}
return nil
})
if err != nil {
return nil, err
}
return s.mounts(ctx, snapshot, key)
}
func (s *blockCIMSnapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
if !strings.Contains(key, snapshots.UnpackKeyPrefix) {
return fmt.Errorf("committing a scratch snapshot to read-only cim layer isn't supported yet")
}
return s.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
usage, err := s.Usage(ctx, key)
if err != nil {
return fmt.Errorf("failed to get usage during commit: %w", err)
}
if _, err := storage.CommitActive(ctx, key, name, usage, opts...); err != nil {
return fmt.Errorf("failed to commit snapshot: %w", err)
}
return nil
})
}
// Remove abandons the transaction identified by key. All resources
// associated with the key will be removed.
func (s *blockCIMSnapshotter) Remove(ctx context.Context, key string) error {
renamedID, err := s.preRemove(ctx, key)
if err != nil {
// wrap as ErrFailedPrecondition so that cleanup of other snapshots can continue
return fmt.Errorf("%w: %s", errdefs.ErrFailedPrecondition, err)
}
// It is a either a scratch or layer snapshot. for scratch, the VHD must have been
// unmounted so can be deleted and for layer CIM must have been unmounted so it
// can be deleted.
if err = os.RemoveAll(s.getSnapshotDir(renamedID)); err != nil {
log.G(ctx).WithError(err).WithField("renamedID", renamedID).Warnf("failed to remove snapshot")
}
return nil
}
func (s *blockCIMSnapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
var newSnapshot storage.Snapshot
err = s.ms.WithTransaction(ctx, true, func(ctx context.Context) (retErr error) {
newSnapshot, err = storage.CreateSnapshot(ctx, kind, key, parent, opts...)
if err != nil {
return fmt.Errorf("failed to create snapshot: %w", err)
}
log.G(ctx).WithFields(logrus.Fields{
"key": key,
"parent": parent,
"ID": newSnapshot.ID,
}).Debugf("createSnapshot")
// Create the new snapshot dir
snDir := s.getSnapshotDir(newSnapshot.ID)
if err = os.MkdirAll(snDir, 0700); err != nil {
return fmt.Errorf("failed to create snapshot dir %s: %w", snDir, err)
}
defer func() {
if retErr != nil {
os.RemoveAll(snDir)
}
}()
if strings.Contains(key, snapshots.UnpackKeyPrefix) {
// IO/disk space optimization: Do nothing
//
// We only need one sandbox.vhdx for the container. Skip making one for this
// snapshot if this isn't the snapshot that just houses the final sandbox.vhd
// that will be mounted as the containers scratch. Currently the key for a snapshot
// where a layer will be extracted to will have the string `extract-` in it.
return nil
}
if len(newSnapshot.ParentIDs) == 0 {
return fmt.Errorf("scratch snapshot without any parents isn't supported")
}
parentLayerPaths := s.parentIDsToParentPaths(newSnapshot.ParentIDs)
var snapshotInfo snapshots.Info
for _, o := range opts {
o(&snapshotInfo)
}
sizeInBytes, err := getRequestedScratchSize(ctx, snapshotInfo)
if err != nil {
return err
}
var makeUVMScratch bool
if _, ok := snapshotInfo.Labels[uvmScratchLabel]; ok {
makeUVMScratch = true
}
// This has to be run first to avoid clashing with the containers sandbox.vhdx.
if makeUVMScratch {
if err = s.createUVMScratchLayer(ctx, snDir, parentLayerPaths); err != nil {
return fmt.Errorf("failed to make UVM's scratch layer: %w", err)
}
}
if err = s.createScratchLayer(ctx, snDir, sizeInBytes); err != nil {
return fmt.Errorf("failed to create scratch layer: %w", err)
}
return nil
})
if err != nil {
return nil, err
}
if !strings.Contains(key, snapshots.UnpackKeyPrefix) && len(newSnapshot.ParentIDs) > 1 {
// We are creating a scratch snapshot, that means we will start a
// container with this scratch snapshot and its parent layers. So we
// should merge the parent layer CIMs. That operation can take long time.
// All concurrent requests to create the merge should first acquire a lock
// for that operation. Each merge operation can be uniquely identified by
// the last layer in that merge, which in case of the scratch snapshot is
// a direct parent of that scratch snapshot.
prepareMergedCIMLocked := func() error {
// function created to limit the scope of locked code and ensure
// that the lock is always released via defer
s.mergeLock.Lock(ctx, parent)
defer s.mergeLock.Unlock(parent)
return s.prepareMergedCIM(ctx, newSnapshot.ParentIDs)
}
if err = prepareMergedCIMLocked(); err != nil {
return nil, fmt.Errorf("failed to prepare merged CIM: %w", err)
}
}
return s.mounts(ctx, newSnapshot, key)
}
// In case of CimFS layers, the scratch VHDs are fully empty (WCIFS layers have reparse points in scratch VHDs, hence those VHDs are unique per image),
// For regular cimfs based layers we create only one scratch VHD and then copy & expand it if required.
// For block CIM based layers, if the scratch is formatted with NTFS we can expand it,
// however, if the scratch is unformatted we have to create a new VHD with the requested
// size, ExpandSandboxSize expects formatted VHD.
func (s *blockCIMSnapshotter) createScratchLayer(ctx context.Context, snDir string, sizeInBytes uint64) error {
if s.config.UnformattedScratch {
if sizeInBytes == 0 {
return copyScratchDisk(filepath.Join(s.root, templateVHDName), filepath.Join(snDir, "sandbox.vhdx"))
} else {
if sizeInBytes < minimumUnformattedScratchSizeInBytes {
return fmt.Errorf("scratch size MUST be at least %d GB, requested size: %d bytes", minimumUnformattedScratchSizeInBytes/(1024*1024*1024), sizeInBytes)
}
if err := createScratchVHD(ctx, filepath.Join(snDir, "sandbox.vhdx"), WithSize(sizeInBytes)); err != nil {
return fmt.Errorf("failed to create scratch VHD of requested size: %w", err)
}
}
} else {
dest := filepath.Join(snDir, "sandbox.vhdx")
if err := copyScratchDisk(filepath.Join(s.root, templateVHDName), dest); err != nil {
return err
}
if sizeInBytes != 0 {
if err := hcsshim.ExpandSandboxSize(s.info, filepath.Base(snDir), sizeInBytes); err != nil {
return fmt.Errorf("failed to expand sandbox vhdx size to %d bytes: %w", sizeInBytes, err)
}
}
}
return nil
}
func (s *blockCIMSnapshotter) mounts(ctx context.Context, sn storage.Snapshot, key string) ([]mount.Mount, error) {
var (
m mount.Mount
blockType string
parentSnInfo []snapshots.Info
parentLayers []*cimfs.BlockCIM
)
m.Type = "BlockCIM"
blockType = "file"
err := s.ms.WithTransaction(ctx, false, func(tCtx context.Context) error {
var tErr error
parentSnInfo, tErr = s.snapshotInfoFromID(tCtx, sn.ParentIDs)
if tErr != nil {
return fmt.Errorf("failed to get info for snapshots: %w", tErr)
}
for i := 0; i < len(parentSnInfo); i++ {
sCIM, tErr := s.getSnapshotBlockCIM(tCtx, sn.ParentIDs[i], parentSnInfo[i])
if tErr != nil {
return tErr
}
parentLayers = append(parentLayers, sCIM)
}
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to get parent snapshot CIMs: %w", err)
}
parentLayerPaths := make([]string, 0, len(parentLayers))
for _, pl := range parentLayers {
parentLayerPaths = append(parentLayerPaths, filepath.Join(pl.BlockPath, pl.CimName))
}
parentLayersJSON, _ := json.Marshal(parentLayerPaths)
m.Options = append(m.Options, mount.ParentLayerCimPathsFlag+string(parentLayersJSON))
m.Options = append(m.Options, mount.BlockCIMTypeFlag+blockType)
// Add configuration options as mount options
if s.config.EnableLayerIntegrity {
m.Options = append(m.Options, mount.EnableLayerIntegrityFlag)
}
if s.config.AppendVHDFooter {
m.Options = append(m.Options, mount.AppendVHDFooterFlag)
}
isScratch, err := s.isScratchSnapshot(sn.ID)
if err != nil {
return nil, err
}
if isScratch {
if len(sn.ParentIDs) > 1 {
mergeBlockCIMPath := filepath.Join(s.getSnapshotDir(sn.ParentIDs[0]), mergedBlockName, mergedCIMName)
m.Options = append(m.Options, mount.MergedCIMPathFlag+mergeBlockCIMPath)
}
m.Source = s.getSnapshotDir(sn.ID)
} else {
m.Source = s.getLayerCIMPathFromCIMBlock(s.getSingleFileCIMBlockPath(sn.ID))
}
log.G(ctx).WithFields(logrus.Fields{
"snapshot ID": sn.ID,
"snapshot name": key,
"parent IDs": sn.ParentIDs,
"mount": m,
}).Debugf("snapshot mounts")
return []mount.Mount{m}, nil
}
const (
layerCIMName = "layer.cim"
layerBlockName = "layer.vhd"
mergedCIMName = "merged.cim"
mergedBlockName = "merged.vhd"
)
// prepareMergedCIM creates a new merged block CIM from the given snapshots. The order of
// `snapshotIDs` is important. It is expected that the snapshot ID at the last index is
// the base layer and the snapshot ID at the 0th index is the immediate parent layer.
// Newly created merged CIM is stored in the same directory as that of the snapshot at 0th
// index. already exists in the 0th snapshot's directory, nothing is done.
func (s *blockCIMSnapshotter) prepareMergedCIM(ctx context.Context, snapshotIDs []string) (rErr error) {
if len(snapshotIDs) < 2 {
return fmt.Errorf("merging CIM requires at least 2 snapshots")
}
log.G(ctx).WithFields(logrus.Fields{
"source snapshots": snapshotIDs,
}).Debugf("preparing merged CIM")
var (
// directory in which the merged CIM is stored
mergeDir = s.getSnapshotDir(snapshotIDs[0])
mergeBlockCIMPath = filepath.Join(mergeDir, mergedBlockName)
snInfos []snapshots.Info
)
// check if a merged CIM already exists
if _, err := os.Stat(mergeBlockCIMPath); err == nil {
log.G(ctx).Debugf("merged CIM already exists.")
return nil
}
err := s.ms.WithTransaction(ctx, false, func(tctx context.Context) error {
var terr error
snInfos, terr = s.snapshotInfoFromID(tctx, snapshotIDs)
if terr != nil {
return fmt.Errorf("failed to get info for snapshots: %w", terr)
}
return nil
})
if err != nil {
return err
}
sourceCIMs := make([]*cimfs.BlockCIM, 0, len(snapshotIDs))
for i := 0; i < len(snapshotIDs); i++ {
sCIM, err := s.getSnapshotBlockCIM(ctx, snapshotIDs[i], snInfos[i])
if err != nil {
return err
}
sourceCIMs = append(sourceCIMs, sCIM)
}
mergedCIM := &cimfs.BlockCIM{
Type: cimfs.BlockCIMTypeSingleFile,
BlockPath: mergeBlockCIMPath,
CimName: "merged.cim",
}
// Build merge options based on snapshotter configuration
var mergeOpts []cimlayer.BlockCIMLayerImportOpt
if s.config.EnableLayerIntegrity {
mergeOpts = append(mergeOpts, cimlayer.WithLayerIntegrity())
}
if s.config.AppendVHDFooter {
mergeOpts = append(mergeOpts, cimlayer.WithVHDFooter())
}
err = cimlayer.MergeBlockCIMLayersWithOpts(ctx, sourceCIMs, mergedCIM, mergeOpts...)
if err != nil {
return fmt.Errorf("failed to merge CIMs: %w", err)
}
log.G(ctx).WithFields(logrus.Fields{
"merged CIM": mergedCIM,
}).Debugf("merged CIM created")
return nil
}
func (s *blockCIMSnapshotter) isScratchSnapshot(snID string) (bool, error) {
scratchDiskPath := filepath.Join(s.getSnapshotDir(snID), "sandbox.vhdx")
if _, err := os.Stat(scratchDiskPath); err != nil {
if os.IsNotExist(err) {
return false, nil
}
return false, err
}
return true, nil
}