Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/vm/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ func addCloneFlags(cmd *cobra.Command) {
cmd.Flags().String("bridge", "", "use TAP-on-bridge instead of CNI (value is bridge device, e.g. cni0)")
cmd.Flags().Bool("no-direct-io", false, "disable O_DIRECT on writable disks (inherit from snapshot if not set)")
cmd.Flags().Bool("on-demand", false, "use UFFD on-demand memory loading for faster clone (CH only; snapshot file must remain on disk)")
cmd.Flags().Bool("cold", false, "clone the disk only and cold-boot the guest, discarding saved memory/vCPU state (CH only; survives hypervisor-version/CPUID changes that block warm restore)")
cmd.Flags().String("vm-id", "", "reuse this VM ID instead of generating one (preserves identity for controller re-adoption after a cold migration)")
cmd.Flags().Bool("pull", false, "auto-pull base image if not found locally (for cross-node clone)")
cmd.Flags().String("from-dir", "", "clone from a snapshot directory (must contain snapshot.json) instead of the local snapshot DB; mutually exclusive with positional SNAPSHOT")
}
13 changes: 13 additions & 0 deletions cmd/vm/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,13 +299,26 @@ func (h Handler) prepareClone(ctx context.Context, cmd *cobra.Command, conf *con
return nil, "", nil, types.NetSetup{}, err
}
vmID := utils.GenerateID()
// Reuse the source VM's ID to preserve identity across a cold migration, so
// the managing controller (e.g. vk-cocoon) re-adopts the clone by its stored
// VMID instead of treating it as an orphan.
if id, _ := cmd.Flags().GetString("vm-id"); id != "" {
vmID = id
}
if vmCfg.Name == "" {
vmCfg.Name = "cocoon-clone-" + network.VMIDPrefix(vmID)
}
if err = vmCfg.Validate(); err != nil {
return nil, "", nil, types.NetSetup{}, err
}

if cold, _ := cmd.Flags().GetBool("cold"); cold {
if conf.UseFirecracker {
return nil, "", nil, types.NetSetup{}, fmt.Errorf("--cold clone is Cloud Hypervisor only")
}
vmCfg.ColdBoot = true
}

if pull, _ := cmd.Flags().GetBool("pull"); pull && vmCfg.Image != "" && vmCfg.ImageType != "" {
backends, initErr := cmdcore.InitImageBackends(ctx, conf)
if initErr != nil {
Expand Down
55 changes: 53 additions & 2 deletions hypervisor/cloudhypervisor/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ func (ch *CloudHypervisor) cloneAfterExtract(ctx context.Context, vmID string, v
return nil, fmt.Errorf("verify base files: %w", err)
}

stateReplacements := buildStateReplacements(chCfg, storageConfigs)

storageConfigs, err = ch.ensureCloneCidata(vmID, vmCfg, networkConfigs, storageConfigs, directBoot)
if err != nil {
return nil, err
Expand All @@ -75,6 +73,11 @@ func (ch *CloudHypervisor) cloneAfterExtract(ctx context.Context, vmID string, v
return nil, fmt.Errorf("validate post-cidata storage: %w", vErr)
}

if vmCfg.ColdBoot {
return ch.coldBootClone(ctx, vmID, vmCfg, net, runDir, logDir, now, bootCfg, storageConfigs, sourceSnapshotID)
}

stateReplacements := buildStateReplacements(chCfg, storageConfigs)
patchStorageConfigs := restorePatchStorageConfigs(storageConfigs, directBoot, vmCfg.Windows, hadCidataInSnapshot)

consoleSock := hypervisor.ConsoleSockPath(runDir)
Expand Down Expand Up @@ -141,6 +144,54 @@ func (ch *CloudHypervisor) cloneAfterExtract(ctx context.Context, vmID string, v
return info, nil
}

// coldBootClone boots a fresh VM from the cloned disk, discarding the snapshot's
// saved CH state (state.json + memory ranges). The guest re-evaluates CPUID at
// boot, so the clone survives hypervisor-version / CPUID changes that would fail
// a warm vm.restore — at the cost of losing in-memory runtime state. The disk
// (COW overlay) is a standard guest-owned image and stays valid across versions.
func (ch *CloudHypervisor) coldBootClone(ctx context.Context, vmID string, vmCfg *types.VMConfig, net types.NetSetup, runDir, logDir string, now time.Time, bootCfg *types.BootConfig, storageConfigs []*types.StorageConfig, sourceSnapshotID string) (_ *types.VM, err error) {
logger := log.WithFunc("cloudhypervisor.coldBootClone")

rec := &hypervisor.VMRecord{
VM: types.VM{
ID: vmID, Hypervisor: typ, State: types.VMStateRunning,
Config: *vmCfg, StorageConfigs: storageConfigs, NetSetup: net,
// FirstBooted suppresses cidata re-attach for an already-provisioned
// disk (see activeDisks) and meters this as a restart, not a boot.
FirstBooted: true,
},
BootConfig: bootCfg,
RunDir: runDir,
LogDir: logDir,
}

// Clone reassigns NIC MACs and disk serials, so a kernel cmdline baked at
// snapshot time is stale; rebuild it for direct-boot guests.
if isDirectBoot(bootCfg) {
dns, dnsErr := ch.conf.DNSServers()
if dnsErr != nil {
return nil, fmt.Errorf("parse DNS servers: %w", dnsErr)
}
bootCfg.Cmdline = buildCmdline(storageConfigs, net.NetworkConfigs, vmCfg.Name, dns)
}

pid, err := ch.launchFresh(ctx, rec, hypervisor.SocketPath(runDir))
if err != nil {
ch.MarkError(ctx, vmID)
return nil, fmt.Errorf("launch CH: %w", err)
}

info := &rec.VM
info.CreatedAt, info.UpdatedAt, info.StartedAt = now, now, &now
if err = ch.FinalizeClone(ctx, vmID, info, bootCfg, nil, sourceSnapshotID); err != nil {
ch.AbortLaunch(ctx, pid, hypervisor.SocketPath(runDir), runDir, runtimeFiles)
return nil, fmt.Errorf("finalize VM record: %w", err)
}

logger.Infof(ctx, "VM %s cold-cloned from snapshot (disk only)", vmID)
return info, nil
}

func (ch *CloudHypervisor) restoreAndResumeClone(
ctx context.Context,
pid int,
Expand Down
14 changes: 10 additions & 4 deletions hypervisor/cloudhypervisor/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,20 @@ func (ch *CloudHypervisor) startOne(ctx context.Context, id string) (bool, error
return ch.StartSequence(ctx, id, hypervisor.StartSpec{
RuntimeFiles: runtimeFiles,
Launch: func(ctx context.Context, rec *hypervisor.VMRecord, sockPath string) (int, error) {
vmCfg := buildVMConfig(ctx, rec, hypervisor.ConsoleSockPath(rec.RunDir))
args := buildCLIArgs(vmCfg, sockPath)
ch.saveCmdline(ctx, rec, args)
return ch.launchProcess(ctx, rec, sockPath, args, rec.ResolvedNetnsPath())
return ch.launchFresh(ctx, rec, sockPath)
},
})
}

// launchFresh cold-boots a VM from its record: builds the CH config from the
// record, persists the cmdline, and starts the process. Shared by normal start
// and cold clone — both boot from disk with no saved CH state to restore.
func (ch *CloudHypervisor) launchFresh(ctx context.Context, rec *hypervisor.VMRecord, sockPath string) (int, error) {
args := buildCLIArgs(buildVMConfig(ctx, rec, hypervisor.ConsoleSockPath(rec.RunDir)), sockPath)
ch.saveCmdline(ctx, rec, args)
return ch.launchProcess(ctx, rec, sockPath, args, rec.ResolvedNetnsPath())
}

func (ch *CloudHypervisor) launchProcess(ctx context.Context, rec *hypervisor.VMRecord, socketPath string, args []string, netnsPath string) (int, error) {
processLog := ch.LogFilePath(rec.LogDir)
logFile, err := os.Create(processLog) //nolint:gosec
Expand Down
1 change: 1 addition & 0 deletions types/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type VMConfig struct {
Name string `json:"name"`

OnDemand bool `json:"-"` // use UFFD on-demand memory restore (CH only); transient, not persisted
ColdBoot bool `json:"-"` // clone disk only and cold-boot, discarding saved memory/vCPU state; transient
User string `json:"-"`
Password string `json:"-"`
DataDisks []DataDiskSpec `json:"-"` // populated from --data-disk; consumed by Create
Expand Down
Loading