diff --git a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/driver/OntapPrimaryDatastoreDriver.java b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/driver/OntapPrimaryDatastoreDriver.java index 04996b74d2a5..b3ced8dc9900 100644 --- a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/driver/OntapPrimaryDatastoreDriver.java +++ b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/driver/OntapPrimaryDatastoreDriver.java @@ -32,6 +32,8 @@ import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.VolumeVO; import com.cloud.storage.ScopeType; +import com.cloud.storage.SnapshotVO; +import com.cloud.storage.dao.SnapshotDao; import com.cloud.storage.dao.SnapshotDetailsDao; import com.cloud.storage.dao.SnapshotDetailsVO; import com.cloud.storage.dao.VolumeDao; @@ -67,7 +69,6 @@ import org.apache.cloudstack.storage.service.model.ProtocolType; import org.apache.cloudstack.storage.to.SnapshotObjectTO; import org.apache.cloudstack.storage.utils.OntapStorageUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; @@ -91,6 +92,7 @@ public class OntapPrimaryDatastoreDriver implements PrimaryDataStoreDriver { @Inject private VolumeDao volumeDao; @Inject private VolumeDetailsDao volumeDetailsDao; @Inject private SnapshotDetailsDao snapshotDetailsDao; + @Inject private SnapshotDao snapshotDao; @Override public Map getCapabilities() { @@ -98,6 +100,7 @@ public Map getCapabilities() { Map mapCapabilities = new HashMap<>(); mapCapabilities.put(DataStoreCapabilities.STORAGE_SYSTEM_SNAPSHOT.toString(), Boolean.TRUE.toString()); mapCapabilities.put(DataStoreCapabilities.CAN_CREATE_VOLUME_FROM_SNAPSHOT.toString(), Boolean.TRUE.toString()); + mapCapabilities.put(DataStoreCapabilities.CAN_REVERT_VOLUME_TO_SNAPSHOT.toString(), Boolean.TRUE.toString()); return mapCapabilities; } @@ -210,8 +213,14 @@ private CloudStackVolume createCloudStackVolume(StoragePoolVO storagePool, Volum /** * Deletes a volume or snapshot from the ONTAP storage system. * - *

For volumes, deletes the backend storage object (LUN for iSCSI, no-op for NFS). - * For snapshots, deletes the FlexVolume snapshot from ONTAP that was created by takeSnapshot.

+ *

For volumes, deletes the backend storage object (LUN for iSCSI, file for NFS) via + * {@link StorageStrategy#deleteCloudStackVolume}.

+ * + *

For volume snapshots, this driver is invoked by the standard CloudStack delete chain + * ({@code StorageSystemSnapshotStrategy} → {@code SnapshotServiceImpl.deleteSnapshot} → + * {@code deleteAsync}). It reads ONTAP metadata from {@code snapshot_details} and delegates + * the actual FlexVol snapshot delete to {@link StorageStrategy} (NFS or iSCSI implementation). + * ONTAP REST/delete-job logic must not live here — keep it in the storage-strategy layer.

*/ @Override public void deleteAsync(DataStore store, DataObject data, AsyncCompletionCallback callback) { @@ -237,8 +246,9 @@ public void deleteAsync(DataStore store, DataObject data, AsyncCompletionCallbac commandResult.setResult(null); commandResult.setSuccess(true); } else if (data.getType() == DataObjectType.SNAPSHOT) { - // Delete the ONTAP FlexVolume snapshot that was created by takeSnapshot - deleteOntapSnapshot((SnapshotInfo) data, commandResult); + logger.info("deleteAsync: volume-snapshot delete for CloudStack snapshot [{}] on primary pool [{}] — " + + "delegating ONTAP FlexVol cleanup to StorageStrategy", data.getId(), store.getId()); + deleteCloudStackVolumeSnapshot((SnapshotInfo) data, commandResult); } else { throw new CloudRuntimeException("Unsupported data object type: " + data.getType()); } @@ -252,79 +262,107 @@ public void deleteAsync(DataStore store, DataObject data, AsyncCompletionCallbac } /** - * Deletes an ONTAP FlexVolume snapshot. + * Orchestrates CloudStack volume-snapshot delete on ONTAP. * - *

Retrieves the snapshot details stored during takeSnapshot and calls the ONTAP - * REST API to delete the FlexVolume snapshot.

+ *

This method is intentionally thin: it resolves identifiers persisted during + * {@link #takeSnapshot} into {@code snapshot_details} and delegates to the protocol + * {@link StorageStrategy} selected from pool details (NFS → {@code UnifiedNASStrategy}, + * iSCSI → {@code UnifiedSANStrategy}). Both protocols share the same FlexVol-level + * snapshot delete REST API.

* - * @param snapshotInfo The CloudStack snapshot to delete - * @param commandResult Result object to populate with success/failure + *

Required {@code snapshot_details} keys (see {@link OntapStorageConstants}):

+ *
    + *
  • {@code base_ontap_fv_id} — FlexVol UUID
  • + *
  • {@code ontap_snap_id} — ONTAP snapshot UUID
  • + *
  • {@code ontap_snap_name} — snapshot name (logging)
  • + *
  • {@code primary_pool_id} — pool used to obtain credentials/protocol strategy
  • + *
*/ - private void deleteOntapSnapshot(SnapshotInfo snapshotInfo, CommandResult commandResult) { + private void deleteCloudStackVolumeSnapshot(SnapshotInfo snapshotInfo, CommandResult commandResult) { long snapshotId = snapshotInfo.getId(); - logger.info("deleteOntapSnapshot: Deleting ONTAP FlexVolume snapshot for CloudStack snapshot [{}]", snapshotId); + logger.info("deleteCloudStackVolumeSnapshot: starting ONTAP delete for CloudStack volume snapshot [{}]", snapshotId); try { - // Retrieve snapshot details stored during takeSnapshot String flexVolUuid = getSnapshotDetail(snapshotId, OntapStorageConstants.BASE_ONTAP_FV_ID); String ontapSnapshotUuid = getSnapshotDetail(snapshotId, OntapStorageConstants.ONTAP_SNAP_ID); String snapshotName = getSnapshotDetail(snapshotId, OntapStorageConstants.ONTAP_SNAP_NAME); String poolIdStr = getSnapshotDetail(snapshotId, OntapStorageConstants.PRIMARY_POOL_ID); if (flexVolUuid == null || ontapSnapshotUuid == null) { - logger.warn("deleteOntapSnapshot: Missing ONTAP snapshot details for snapshot [{}]. " + - "flexVolUuid={}, ontapSnapshotUuid={}. Snapshot may have been created by a different method or already deleted.", + logger.warn("deleteCloudStackVolumeSnapshot: missing ONTAP identity for snapshot [{}] " + + "(flexVolUuid={}, ontapSnapshotUuid={}). Cannot call ONTAP delete; " + + "treating as no-op — verify snapshot_details were written during takeSnapshot", snapshotId, flexVolUuid, ontapSnapshotUuid); - // Consider this a success since there's nothing to delete on ONTAP commandResult.setSuccess(true); commandResult.setResult(null); return; } - long poolId = Long.parseLong(poolIdStr); + long poolId = resolveSnapshotPoolId(poolIdStr, snapshotId); Map poolDetails = storagePoolDetailsDao.listDetailsKeyPairs(poolId); - + String protocol = poolDetails.get(OntapStorageConstants.PROTOCOL); StorageStrategy storageStrategy = OntapStorageUtils.getStrategyByStoragePoolDetails(poolDetails); - SnapshotFeignClient snapshotClient = storageStrategy.getSnapshotFeignClient(); - String authHeader = storageStrategy.getAuthHeader(); - logger.info("deleteOntapSnapshot: Deleting ONTAP snapshot [{}] (uuid={}) from FlexVol [{}]", - snapshotName, ontapSnapshotUuid, flexVolUuid); + logger.info("deleteCloudStackVolumeSnapshot: snapshot [{}] — protocol [{}], pool [{}], " + + "flexVol [{}], ontapSnapshot [{}] (name [{}])", + snapshotId, protocol, poolId, flexVolUuid, ontapSnapshotUuid, snapshotName); - // Call ONTAP REST API to delete the snapshot - JobResponse jobResponse = snapshotClient.deleteSnapshot(authHeader, flexVolUuid, ontapSnapshotUuid); - - if (jobResponse != null && jobResponse.getJob() != null) { - // Poll for job completion - Boolean jobSucceeded = storageStrategy.jobPollForSuccess(jobResponse.getJob().getUuid(), 30, 2000); - if (!jobSucceeded) { - throw new CloudRuntimeException("Delete job failed for snapshot [" + - snapshotName + "] on FlexVol [" + flexVolUuid + "]"); - } - } - - logger.info("deleteOntapSnapshot: Successfully deleted ONTAP snapshot [{}] (uuid={}) for CloudStack snapshot [{}]", - snapshotName, ontapSnapshotUuid, snapshotId); + storageStrategy.deleteFlexVolSnapshotForCloudStackVolume(flexVolUuid, ontapSnapshotUuid, snapshotName); + logger.info("deleteCloudStackVolumeSnapshot: completed ONTAP delete for CloudStack volume snapshot [{}]", snapshotId); commandResult.setSuccess(true); commandResult.setResult(null); - } catch (Exception e) { - // Check if the error indicates snapshot doesn't exist (already deleted) - String errorMsg = e.getMessage(); - if (errorMsg != null && (errorMsg.contains("404") || errorMsg.contains("not found") || - errorMsg.contains("does not exist"))) { - logger.warn("deleteOntapSnapshot: ONTAP snapshot for CloudStack snapshot [{}] not found, " + - "may have been already deleted. Treating as success.", snapshotId); + if (isSnapshotNotFoundError(e)) { + logger.warn("deleteCloudStackVolumeSnapshot: ONTAP snapshot for CloudStack snapshot [{}] " + + "already absent (idempotent success): {}", snapshotId, e.getMessage()); commandResult.setSuccess(true); commandResult.setResult(null); - } else { - logger.error("deleteOntapSnapshot: Failed to delete ONTAP snapshot for CloudStack snapshot [{}]: {}", - snapshotId, e.getMessage(), e); - commandResult.setSuccess(false); - commandResult.setResult(e.getMessage()); + return; + } + logger.error("deleteCloudStackVolumeSnapshot: ONTAP delete failed for CloudStack snapshot [{}]: {}", + snapshotId, e.getMessage(), e); + commandResult.setSuccess(false); + commandResult.setResult(e.getMessage()); + } + } + + /** + * Returns true when the exception indicates the ONTAP snapshot was already removed. + * Delete is idempotent: a missing backend snapshot is treated as success. + */ + private boolean isSnapshotNotFoundError(Throwable error) { + if (error == null) { + return false; + } + String message = error.getMessage(); + if (message != null) { + String lower = message.toLowerCase(); + if (lower.contains("404") || lower.contains("not found") || lower.contains("does not exist") + || lower.contains("entry doesn't exist")) { + return true; } } + return isSnapshotNotFoundError(error.getCause()); + } + + private long resolveSnapshotPoolId(String poolIdStr, long snapshotId) { + if (poolIdStr != null && !poolIdStr.isEmpty()) { + return Long.parseLong(poolIdStr); + } + SnapshotVO snapshotVO = snapshotDao.findById(snapshotId); + if (snapshotVO == null) { + throw new CloudRuntimeException("Cannot resolve storage pool for snapshot [" + snapshotId + "]"); + } + VolumeVO volumeVO = volumeDao.findByIdIncludingRemoved(snapshotVO.getVolumeId()); + if (volumeVO == null) { + throw new CloudRuntimeException("Cannot resolve storage pool for snapshot [" + snapshotId + "]"); + } + Long poolId = volumeVO.getPoolId() != null ? volumeVO.getPoolId() : volumeVO.getLastPoolId(); + if (poolId == null || poolId <= 0) { + throw new CloudRuntimeException("Cannot resolve storage pool for snapshot [" + snapshotId + "]"); + } + return poolId; } @Override @@ -670,8 +708,8 @@ public void takeSnapshot(SnapshotInfo snapshot, AsyncCompletionCallback-} + * Builds an ONTAP-safe snapshot name from the CloudStack UI name with uniqueness suffix. */ - private String buildSnapshotName(String volumeName, String snapshotUuid) { - String name = volumeName + "-" + snapshotUuid; - int maxLength = OntapStorageConstants.MAX_SNAPSHOT_NAME_LENGTH; - int trimRequired = name.length() - maxLength; - - if (trimRequired > 0) { - name = StringUtils.left(volumeName, volumeName.length() - trimRequired) + "-" + snapshotUuid; - } - return name; + private String buildSnapshotName(String cloudStackSnapshotName, long snapshotId) { + return OntapStorageUtils.buildOntapSnapshotName(cloudStackSnapshotName, "cs" + snapshotId); } /** * Persists snapshot metadata in snapshot_details table. * + * Persists ONTAP snapshot metadata in {@code snapshot_details} for revert and delete. + * + *

Volume-snapshot delete reads {@code base_ontap_fv_id} and {@code ontap_snap_id} here + * during {@link #deleteCloudStackVolumeSnapshot}; missing rows prevent ONTAP cleanup.

+ * * @param csSnapshotId CloudStack snapshot ID * @param csVolumeId Source CloudStack volume ID * @param flexVolUuid ONTAP FlexVolume UUID diff --git a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/feign/client/SnapshotFeignClient.java b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/feign/client/SnapshotFeignClient.java index 2f0e050d6f55..d9566b422e38 100644 --- a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/feign/client/SnapshotFeignClient.java +++ b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/feign/client/SnapshotFeignClient.java @@ -181,4 +181,96 @@ JobResponse restoreFileFromSnapshot(@Param("authHeader") String authHeader, @Headers({"Authorization: {authHeader}", "Content-Type: application/json"}) JobResponse restoreFileFromSnapshotCli(@Param("authHeader") String authHeader, CliSnapshotRestoreRequest request); + + /** + * Creates a consistency group. + * + *

ONTAP REST: {@code POST /api/application/consistency-groups}

+ * + * @param authHeader Basic auth header + * @param request consistency group create request body + * @return JobResponse containing the async job reference + */ + @RequestLine("POST /api/application/consistency-groups") + @Headers({"Authorization: {authHeader}", "Content-Type: application/json"}) + JobResponse createConsistencyGroup(@Param("authHeader") String authHeader, + Map request); + + /** + * Lists consistency groups. + * + *

ONTAP REST: {@code GET /api/application/consistency-groups}

+ * + * @param authHeader Basic auth header + * @param queryParams Optional query parameters + * @return Paginated consistency group records + */ + @RequestLine("GET /api/application/consistency-groups") + @Headers({"Authorization: {authHeader}"}) + OntapResponse> getConsistencyGroups(@Param("authHeader") String authHeader, + @QueryMap Map queryParams); + + /** + * Creates (starts) a consistency group snapshot. + * + *

ONTAP REST: {@code POST /api/application/consistency-groups/{cgUuid}/snapshots}

+ * + * @param authHeader Basic auth header + * @param cgUuid consistency group UUID + * @param request snapshot start request body + * @return JobResponse containing the async job reference + */ + @RequestLine("POST /api/application/consistency-groups/{cgUuid}/snapshots") + @Headers({"Authorization: {authHeader}", "Content-Type: application/json"}) + JobResponse createConsistencyGroupSnapshot(@Param("authHeader") String authHeader, + @Param("cgUuid") String cgUuid, + Map request); + + /** + * Lists snapshots for a consistency group. + * + *

ONTAP REST: {@code GET /api/application/consistency-groups/{cgUuid}/snapshots}

+ * + * @param authHeader Basic auth header + * @param cgUuid consistency group UUID + * @param queryParams Optional query parameters + * @return Paginated consistency group snapshot records + */ + @RequestLine("GET /api/application/consistency-groups/{cgUuid}/snapshots") + @Headers({"Authorization: {authHeader}"}) + OntapResponse> getConsistencyGroupSnapshots(@Param("authHeader") String authHeader, + @Param("cgUuid") String cgUuid, + @QueryMap Map queryParams); + + /** + * Commits a started consistency group snapshot. + * + *

ONTAP REST: {@code PATCH /api/application/consistency-groups/{cgUuid}/snapshots/{snapshotUuid}}

+ * + * @param authHeader Basic auth header + * @param cgUuid consistency group UUID + * @param snapshotUuid consistency group snapshot UUID + * @param request commit request body + * @return JobResponse containing the async job reference + */ + @RequestLine("PATCH /api/application/consistency-groups/{cgUuid}/snapshots/{snapshotUuid}") + @Headers({"Authorization: {authHeader}", "Content-Type: application/json"}) + JobResponse commitConsistencyGroupSnapshot(@Param("authHeader") String authHeader, + @Param("cgUuid") String cgUuid, + @Param("snapshotUuid") String snapshotUuid, + Map request); + + /** + * Deletes a consistency group. + * + *

ONTAP REST: {@code DELETE /api/application/consistency-groups/{cgUuid}}

+ * + * @param authHeader Basic auth header + * @param cgUuid consistency group UUID + * @return JobResponse containing the async job reference + */ + @RequestLine("DELETE /api/application/consistency-groups/{cgUuid}") + @Headers({"Authorization: {authHeader}"}) + JobResponse deleteConsistencyGroup(@Param("authHeader") String authHeader, + @Param("cgUuid") String cgUuid); } diff --git a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/service/StorageStrategy.java b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/service/StorageStrategy.java index bd808a26d6f8..35a58f646499 100644 --- a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/service/StorageStrategy.java +++ b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/service/StorageStrategy.java @@ -527,15 +527,13 @@ public String getNetworkInterface() { abstract public CloudStackVolume getCloudStackVolume(Map cloudStackVolumeMap); /** - * Reverts a CloudStack volume to a snapshot using protocol-specific ONTAP APIs. + * Reverts a CloudStack volume to a snapshot using ONTAP CLI-based Single File Snap Restore (SFSR). * - *

This method encapsulates the snapshot revert behavior based on protocol:

- *
    - *
  • iSCSI/FC: Uses {@code POST /api/storage/luns/{lun.uuid}/restore} - * to restore LUN data from the FlexVolume snapshot.
  • - *
  • NFS: Uses {@code POST /api/storage/volumes/{vol.uuid}/snapshots/{snap.uuid}/files/{path}/restore} - * to restore a single file from the FlexVolume snapshot.
  • - *
+ *

Both NFS and iSCSI use the CLI passthrough API: + * {@code POST /api/private/cli/volume/snapshot/restore-file}

+ * + *

Callers should invoke {@link #executeCliSfsrRestore(JobResponse, String)} after this + * method returns to poll the async job when present, or treat a missing job as synchronous success.

* * @param snapshotName The ONTAP FlexVolume snapshot name * @param flexVolUuid The FlexVolume UUID containing the snapshot @@ -681,4 +679,88 @@ public Boolean jobPollForSuccess(String jobUUID, int maxRetries, int sleepTimeIn } return true; } + + /** + * Polls an ONTAP async job when the API response includes a job reference. + * + *

When no job is returned (common for CLI passthrough SFSR on synchronous completion), + * the operation is treated as successful after HTTP 2xx.

+ * + * @param response ONTAP job response (may be null or without a job) + * @param operationName label for logging and error messages + */ + public void pollJobIfPresent(JobResponse response, String operationName) { + pollJobIfPresent(response, operationName, + OntapStorageConstants.ONTAP_CG_JOB_MAX_RETRIES, + OntapStorageConstants.ONTAP_CG_JOB_POLL_INTERVAL_MS); + } + + /** + * Polls an ONTAP async job when present, using caller-supplied retry settings. + */ + public void pollJobIfPresent(JobResponse response, String operationName, + int maxRetries, int pollIntervalMs) { + if (response == null || response.getJob() == null || response.getJob().getUuid() == null) { + logger.debug("pollJobIfPresent: No async job returned for operation [{}], continuing without polling", + operationName); + return; + } + Boolean success = jobPollForSuccess(response.getJob().getUuid(), maxRetries, pollIntervalMs); + if (!Boolean.TRUE.equals(success)) { + throw new CloudRuntimeException("ONTAP operation failed: " + operationName); + } + } + + /** + * Completes CLI-based SFSR ({@code restore-file}) orchestration: poll job when returned, + * otherwise accept synchronous success. + */ + public void executeCliSfsrRestore(JobResponse response, String operationName) { + pollJobIfPresent(response, operationName, + OntapStorageConstants.ONTAP_SFSR_JOB_MAX_RETRIES, + OntapStorageConstants.ONTAP_SFSR_JOB_POLL_INTERVAL_MS); + } + + /** + * Deletes a FlexVolume snapshot on ONTAP for a CloudStack volume snapshot. + * + *

ONTAP volume snapshots (NFS and iSCSI) are FlexVol-level snapshots created by + * {@code POST /storage/volumes/{uuid}/snapshots} during take. Delete uses the matching + * REST {@code DELETE /storage/volumes/{uuid}/snapshots/{snapshot.uuid}} API regardless + * of whether the CloudStack volume is a file (NFS) or LUN (iSCSI). Protocol-specific + * subclasses ({@code UnifiedNASStrategy}, {@code UnifiedSANStrategy}) inherit this + * implementation; revert/restore remains protocol-specific via SFSR CLI.

+ * + *

Called from {@link org.apache.cloudstack.storage.driver.OntapPrimaryDatastoreDriver} + * during the standard delete chain — not from a separate ONTAP snapshot strategy.

+ * + * @param flexVolUuid ONTAP FlexVolume UUID + * @param snapshotUuid ONTAP FlexVolume snapshot UUID + * @param snapshotName ONTAP FlexVolume snapshot name (for logging) + */ + public void deleteFlexVolSnapshotForCloudStackVolume(String flexVolUuid, String snapshotUuid, String snapshotName) { + if (flexVolUuid == null || flexVolUuid.isEmpty() || snapshotUuid == null || snapshotUuid.isEmpty()) { + throw new CloudRuntimeException("FlexVolume UUID and snapshot UUID are required to delete an ONTAP snapshot"); + } + + logger.info("deleteFlexVolSnapshotForCloudStackVolume: issuing ONTAP REST delete for snapshot [{}] " + + "(uuid={}) on FlexVol [{}]", snapshotName, snapshotUuid, flexVolUuid); + + JobResponse jobResponse = snapshotFeignClient.deleteSnapshot(getAuthHeader(), flexVolUuid, snapshotUuid); + + if (jobResponse == null || jobResponse.getJob() == null) { + logger.debug("deleteFlexVolSnapshotForCloudStackVolume: no async job returned for snapshot [{}] " + + "(uuid={}); treating HTTP success as completion", snapshotName, snapshotUuid); + } else { + logger.debug("deleteFlexVolSnapshotForCloudStackVolume: polling ONTAP delete job [{}] for snapshot [{}]", + jobResponse.getJob().getUuid(), snapshotName); + } + + pollJobIfPresent(jobResponse, "delete FlexVol snapshot [" + snapshotName + "] uuid [" + snapshotUuid + "]", + OntapStorageConstants.ONTAP_SNAPSHOT_DELETE_JOB_MAX_RETRIES, + OntapStorageConstants.ONTAP_SNAPSHOT_DELETE_JOB_POLL_INTERVAL_MS); + + logger.info("deleteFlexVolSnapshotForCloudStackVolume: ONTAP FlexVol snapshot [{}] (uuid={}) removed from [{}]", + snapshotName, snapshotUuid, flexVolUuid); + } } diff --git a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/utils/OntapStorageConstants.java b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/utils/OntapStorageConstants.java index d0ea1783aa1d..bbd81f6ccc59 100644 --- a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/utils/OntapStorageConstants.java +++ b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/utils/OntapStorageConstants.java @@ -105,6 +105,13 @@ public class OntapStorageConstants { public static final String ONTAP_SNAP_SIZE = "ontap_snap_size"; public static final String FILE_PATH = "file_path"; public static final int MAX_SNAPSHOT_NAME_LENGTH = 64; + public static final String ONTAP_TEMP_CG_PREFIX = "cs-temp-cg-"; + public static final int ONTAP_CG_JOB_MAX_RETRIES = 60; + public static final int ONTAP_CG_JOB_POLL_INTERVAL_MS = 2000; + public static final int ONTAP_SFSR_JOB_MAX_RETRIES = 60; + public static final int ONTAP_SFSR_JOB_POLL_INTERVAL_MS = 2000; + public static final int ONTAP_SNAPSHOT_DELETE_JOB_MAX_RETRIES = 30; + public static final int ONTAP_SNAPSHOT_DELETE_JOB_POLL_INTERVAL_MS = 2000; /** vm_snapshot_details key for ONTAP FlexVolume-level VM snapshots. */ public static final String ONTAP_FLEXVOL_SNAPSHOT = "ontapFlexVolSnapshot"; diff --git a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/utils/OntapStorageUtils.java b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/utils/OntapStorageUtils.java index 596372edcf16..b05100a4886c 100644 --- a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/utils/OntapStorageUtils.java +++ b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/utils/OntapStorageUtils.java @@ -154,4 +154,46 @@ public static String getLunName(String volName, String lunName) { return OntapStorageConstants.VOLUME_PATH_PREFIX + volName + OntapStorageConstants.SLASH + lunName; } + /** + * Builds an ONTAP-safe name token from user-provided snapshot text. + */ + public static String getOntapCloneName(String cloudStackSnapshotName) { + if (cloudStackSnapshotName == null || cloudStackSnapshotName.trim().isEmpty()) { + throw new InvalidParameterValueException("Snapshot name cannot be null or blank"); + } + String normalized = cloudStackSnapshotName.replaceAll("[^a-zA-Z0-9_]", "_"); + if (normalized.isEmpty()) { + normalized = "snapshot"; + } + if (!Character.isLetter(normalized.charAt(0))) { + normalized = "s_" + normalized; + } + if (normalized.length() > OntapStorageConstants.MAX_SNAPSHOT_NAME_LENGTH) { + normalized = normalized.substring(0, OntapStorageConstants.MAX_SNAPSHOT_NAME_LENGTH); + } + return normalized; + } + + /** + * Builds an ONTAP-safe snapshot name that preserves the CloudStack UI snapshot name + * and appends a uniqueness suffix. + */ + public static String buildOntapSnapshotName(String cloudStackSnapshotName, String uniquenessSuffix) { + String normalizedBase = (cloudStackSnapshotName == null || cloudStackSnapshotName.trim().isEmpty()) + ? "snapshot" + : getOntapCloneName(cloudStackSnapshotName); + String suffix = (uniquenessSuffix == null || uniquenessSuffix.isEmpty()) + ? "" + : "_" + uniquenessSuffix.replaceAll("[^a-zA-Z0-9_]", "_"); + int maxLength = OntapStorageConstants.MAX_SNAPSHOT_NAME_LENGTH; + int maxBaseLength = maxLength - suffix.length(); + if (maxBaseLength <= 0) { + return normalizedBase.substring(0, maxLength); + } + if (normalizedBase.length() > maxBaseLength) { + normalizedBase = normalizedBase.substring(0, maxBaseLength); + } + return normalizedBase + suffix; + } + } diff --git a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/vmsnapshot/OntapVMSnapshotStrategy.java b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/vmsnapshot/OntapVMSnapshotStrategy.java index a71df4c2e349..8bbea431b726 100644 --- a/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/vmsnapshot/OntapVMSnapshotStrategy.java +++ b/plugins/storage/volume/ontap/src/main/java/org/apache/cloudstack/storage/vmsnapshot/OntapVMSnapshotStrategy.java @@ -20,8 +20,10 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import javax.inject.Inject; @@ -33,7 +35,6 @@ import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; import org.apache.cloudstack.storage.feign.client.SnapshotFeignClient; -import org.apache.cloudstack.storage.feign.model.CliSnapshotRestoreRequest; import org.apache.cloudstack.storage.feign.model.FlexVolSnapshot; import org.apache.cloudstack.storage.feign.model.response.JobResponse; import org.apache.cloudstack.storage.feign.model.response.OntapResponse; @@ -72,25 +73,22 @@ import org.apache.cloudstack.storage.utils.OntapStorageConstants; /** - * VM Snapshot strategy for NetApp ONTAP managed storage using FlexVolume-level snapshots. + * VM Snapshot strategy for NetApp ONTAP managed storage using temporary consistency-group orchestration. * *

This strategy handles VM-level (instance) snapshots for VMs whose volumes - * reside on ONTAP managed primary storage. Instead of creating per-file clones - * (the old approach), it takes ONTAP FlexVolume-level snapshots via the - * ONTAP REST API ({@code POST /api/storage/volumes/{uuid}/snapshots}).

- * - *

Key Advantage:

- *

When multiple CloudStack disks (ROOT + DATA) reside on the same ONTAP - * FlexVolume, a single FlexVolume snapshot atomically captures all of them. - * This is both faster and more storage-efficient than per-file clones.

+ * reside on ONTAP managed primary storage. When VM volumes span multiple FlexVols, + * snapshot creation is coordinated through a temporary ONTAP consistency group (CG) + * and two-phase snapshot flow (start + commit). When all volumes share a single FlexVol, + * a direct FlexVol snapshot is used instead.

* *

Flow:

*
    *
  1. Group all VM volumes by their parent FlexVolume UUID
  2. *
  3. Freeze the VM via QEMU guest agent ({@code fsfreeze}) — if quiesce requested
  4. - *
  5. For each unique FlexVolume, create one ONTAP snapshot
  6. + *
  7. If VM spans multiple FlexVolumes: create temporary CG, start + commit CG snapshot (two-phase)
  8. + *
  9. If VM spans a single FlexVolume: create one FlexVol snapshot directly (no CG overhead)
  10. *
  11. Thaw the VM
  12. - *
  13. Record FlexVolume → snapshot UUID mappings in {@code vm_snapshot_details}
  14. + *
  15. Resolve FlexVolume → snapshot UUID mappings and persist in {@code vm_snapshot_details}
  16. *
* *

Metadata in vm_snapshot_details:

@@ -251,12 +249,14 @@ boolean allVolumesOnOntapManagedStorage(long vmId) { /** * Takes a VM-level snapshot by freezing the VM, creating ONTAP FlexVolume-level - * snapshots (one per unique FlexVolume), and then thawing the VM. + * snapshot(s), and then thawing the VM. * *

Volumes are grouped by their parent FlexVolume UUID (from storage pool details). - * For each unique FlexVolume, exactly one ONTAP snapshot is created via - * {@code POST /api/storage/volumes/{uuid}/snapshots}. This means if a VM has - * ROOT and DATA disks on the same FlexVolume, only one snapshot is created.

+ * When the VM spans more than one unique FlexVolume, a temporary ONTAP + * consistency group is used with two-phase snapshot semantics (start + commit) so + * all FlexVols are captured at the same point in time. When all VM volumes reside + * on a single FlexVolume, a direct per-FlexVol snapshot is taken instead — + * CG orchestration is unnecessary in that case.

* *

Memory Snapshots Not Supported: This strategy only supports disk-only * (crash-consistent) snapshots. Memory snapshots (snapshotmemory=true) are rejected @@ -286,7 +286,7 @@ public VMSnapshot takeVMSnapshot(VMSnapshot vmSnapshot) { FreezeThawVMAnswer thawAnswer = null; long startFreeze = 0; - // Track which FlexVolume snapshots were created (for rollback) + // Track which FlexVolume snapshots were created (for rollback and detail persistence) List createdSnapshots = new ArrayList<>(); boolean result = false; @@ -338,7 +338,8 @@ public VMSnapshot takeVMSnapshot(VMSnapshot vmSnapshot) { CreateVMSnapshotCommand ccmd = new CreateVMSnapshotCommand( userVm.getInstanceName(), userVm.getUuid(), target, volumeTOs, guestOS.getDisplayName()); - logger.info("takeVMSnapshot: Creating ONTAP FlexVolume VM Snapshot for VM [{}] with quiesce={}", userVm.getInstanceName(), quiesceVm); + logger.info("takeVMSnapshot: Creating ONTAP VM snapshot for VM [{}] with quiesce={}", + userVm.getInstanceName(), quiesceVm); // Prepare volume info list and calculate sizes for (VolumeObjectTO volumeObjectTO : volumeTOs) { @@ -375,56 +376,20 @@ public VMSnapshot takeVMSnapshot(VMSnapshot vmSnapshot) { userVm.getInstanceName(), quiesceVm, vmIsRunning); } - // ── Step 2: Create FlexVolume-level snapshots ── + // ── Step 2: Create FlexVolume-level snapshot(s) ── try { String snapshotNameBase = buildSnapshotName(vmSnapshot); - for (Map.Entry entry : flexVolGroups.entrySet()) { - String flexVolUuid = entry.getKey(); - FlexVolGroupInfo groupInfo = entry.getValue(); - long startSnapshot = System.nanoTime(); - - // Build storage strategy from pool details to get the feign client - StorageStrategy storageStrategy = OntapStorageUtils.getStrategyByStoragePoolDetails(groupInfo.poolDetails); - SnapshotFeignClient snapshotClient = storageStrategy.getSnapshotFeignClient(); - String authHeader = storageStrategy.getAuthHeader(); - - // Use the same snapshot name for all FlexVolumes in this VM snapshot - // (each FlexVolume gets its own independent snapshot with this name) - FlexVolSnapshot snapshotRequest = new FlexVolSnapshot(snapshotNameBase, - "CloudStack VM snapshot " + vmSnapshot.getName() + " for VM " + userVm.getInstanceName()); - - logger.info("takeVMSnapshot: Creating ONTAP FlexVolume snapshot [{}] on FlexVol UUID [{}] covering {} volume(s)", - snapshotNameBase, flexVolUuid, groupInfo.volumeIds.size()); - - JobResponse jobResponse = snapshotClient.createSnapshot(authHeader, flexVolUuid, snapshotRequest); - if (jobResponse == null || jobResponse.getJob() == null) { - throw new CloudRuntimeException("Failed to initiate FlexVolume snapshot on FlexVol UUID [" + flexVolUuid + "]"); - } - - // Poll for job completion - Boolean jobSucceeded = storageStrategy.jobPollForSuccess(jobResponse.getJob().getUuid(), 30, 2000); - if (!jobSucceeded) { - throw new CloudRuntimeException("FlexVolume snapshot job failed on FlexVol UUID [" + flexVolUuid + "]"); - } - - // Retrieve the created snapshot UUID by name - String snapshotUuid = resolveSnapshotUuid(snapshotClient, authHeader, flexVolUuid, snapshotNameBase); - - String protocol = groupInfo.poolDetails.get(OntapStorageConstants.PROTOCOL); - - // Create one detail per CloudStack volume in this FlexVol group (for single-file restore during revert) - for (Long volumeId : groupInfo.volumeIds) { - String volumePath = resolveVolumePathOnOntap(volumeId, protocol, groupInfo.poolDetails); - FlexVolSnapshotDetail detail = new FlexVolSnapshotDetail( - flexVolUuid, snapshotUuid, snapshotNameBase, volumePath, groupInfo.poolId, protocol); - createdSnapshots.add(detail); - } - - logger.info("takeVMSnapshot: ONTAP FlexVolume snapshot [{}] (uuid={}) on FlexVol [{}] completed in {} ms. Covers volumes: {}", - snapshotNameBase, snapshotUuid, flexVolUuid, - TimeUnit.MILLISECONDS.convert(System.nanoTime() - startSnapshot, TimeUnit.NANOSECONDS), - groupInfo.volumeIds); + // CG orchestration is only required when VM disks span multiple FlexVols. + // A single FlexVol already provides atomic capture for all volumes on that FlexVol. + if (flexVolGroups.size() > 1) { + logger.info("takeVMSnapshot: VM [{}] spans {} FlexVol(s); using temporary CG two-phase snapshot flow", + userVm.getInstanceName(), flexVolGroups.size()); + createVmSnapshotsViaTemporaryCg(vmSnapshot, userVm, flexVolGroups, snapshotNameBase, createdSnapshots); + } else { + logger.info("takeVMSnapshot: VM [{}] spans a single FlexVol; using direct FlexVol snapshot flow", + userVm.getInstanceName()); + createVmSnapshotsViaSingleFlexVol(vmSnapshot, userVm, flexVolGroups, snapshotNameBase, createdSnapshots); } } finally { // ── Step 3: Thaw the VM (only if it was frozen, always even on error) ── @@ -456,7 +421,7 @@ public VMSnapshot takeVMSnapshot(VMSnapshot vmSnapshot) { answer.setVolumeTOs(volumeTOs); processAnswer(vmSnapshotVO, userVm, answer, null); - logger.info("takeVMSnapshot: ONTAP FlexVolume VM Snapshot [{}] created successfully for VM [{}] ({} FlexVol snapshot(s))", + logger.info("takeVMSnapshot: ONTAP VM Snapshot [{}] created successfully for VM [{}] ({} detail row(s))", vmSnapshot.getName(), userVm.getInstanceName(), createdSnapshots.size()); long newChainSize = 0; @@ -668,16 +633,140 @@ Map groupVolumesByFlexVol(List volumeT } /** - * Builds a deterministic, ONTAP-safe snapshot name for a VM snapshot. - * Format: {@code vmsnap__} + * Creates VM snapshot artifacts via direct FlexVol snapshot API. + * + *

Used when all VM volumes map to a single FlexVol. In that case a CG is not + * needed because one FlexVol snapshot already captures every disk atomically.

*/ - String buildSnapshotName(VMSnapshot vmSnapshot) { - String name = "vmsnap_" + vmSnapshot.getId() + "_" + System.currentTimeMillis(); - // ONTAP snapshot names: max 256 chars, must start with letter, only alphanumeric and underscores - if (name.length() > OntapStorageConstants.MAX_SNAPSHOT_NAME_LENGTH) { - name = name.substring(0, OntapStorageConstants.MAX_SNAPSHOT_NAME_LENGTH); + void createVmSnapshotsViaSingleFlexVol(VMSnapshot vmSnapshot, UserVm userVm, + Map flexVolGroups, + String snapshotNameBase, + List createdSnapshots) { + for (Map.Entry entry : flexVolGroups.entrySet()) { + String flexVolUuid = entry.getKey(); + FlexVolGroupInfo groupInfo = entry.getValue(); + long startSnapshot = System.nanoTime(); + + StorageStrategy storageStrategy = resolveStorageStrategy(groupInfo.poolDetails); + SnapshotFeignClient snapshotClient = storageStrategy.getSnapshotFeignClient(); + String authHeader = storageStrategy.getAuthHeader(); + + FlexVolSnapshot snapshotRequest = new FlexVolSnapshot(snapshotNameBase, + "CloudStack VM snapshot " + vmSnapshot.getName() + " for VM " + userVm.getInstanceName()); + + logger.info("takeVMSnapshot: [FlexVol] Creating snapshot [{}] on FlexVol UUID [{}] covering {} volume(s)", + snapshotNameBase, flexVolUuid, groupInfo.volumeIds.size()); + + JobResponse jobResponse = snapshotClient.createSnapshot(authHeader, flexVolUuid, snapshotRequest); + if (jobResponse == null || jobResponse.getJob() == null) { + throw new CloudRuntimeException("Failed to initiate FlexVolume snapshot on FlexVol UUID [" + flexVolUuid + "]"); + } + + Boolean jobSucceeded = storageStrategy.jobPollForSuccess(jobResponse.getJob().getUuid(), 30, 2000); + if (!jobSucceeded) { + throw new CloudRuntimeException("FlexVolume snapshot job failed on FlexVol UUID [" + flexVolUuid + "]"); + } + + String snapshotUuid = resolveSnapshotUuid(snapshotClient, authHeader, flexVolUuid, snapshotNameBase); + String protocol = groupInfo.poolDetails.get(OntapStorageConstants.PROTOCOL); + + for (Long volumeId : groupInfo.volumeIds) { + String volumePath = resolveVolumePathOnOntap(volumeId, protocol, groupInfo.poolDetails); + createdSnapshots.add(new FlexVolSnapshotDetail( + flexVolUuid, snapshotUuid, snapshotNameBase, volumePath, groupInfo.poolId, protocol)); + } + + logger.info("takeVMSnapshot: [FlexVol] Snapshot [{}] (uuid={}) on FlexVol [{}] completed in {} ms. Covers volumes: {}", + snapshotNameBase, snapshotUuid, flexVolUuid, + TimeUnit.MILLISECONDS.convert(System.nanoTime() - startSnapshot, TimeUnit.NANOSECONDS), + groupInfo.volumeIds); } - return name; + } + + /** + * Creates VM snapshot artifacts via temporary consistency-group two-phase flow. + * + *

Used when VM volumes span multiple FlexVols and require a consistent + * point-in-time capture across all participating FlexVolumes.

+ */ + void createVmSnapshotsViaTemporaryCg(VMSnapshot vmSnapshot, UserVm userVm, + Map flexVolGroups, + String snapshotNameBase, + List createdSnapshots) { + String tempCgName = buildTemporaryConsistencyGroupName(vmSnapshot); + String tempCgUuid = null; + String cgSnapshotUuid = null; + long cgFlowStart = System.nanoTime(); + + // All volumes in a VM snapshot belong to ONTAP-managed pools and share the same ONTAP credentials. + // Use any one FlexVol group to obtain strategy/client objects for this operation. + FlexVolGroupInfo referenceGroup = flexVolGroups.values().iterator().next(); + StorageStrategy storageStrategy = resolveStorageStrategy(referenceGroup.poolDetails); + SnapshotFeignClient snapshotClient = storageStrategy.getSnapshotFeignClient(); + String authHeader = storageStrategy.getAuthHeader(); + + try { + logger.info("takeVMSnapshot: [CG:Create] Creating temporary consistency group [{}] for VM [{}] over {} FlexVol(s)", + tempCgName, userVm.getInstanceName(), flexVolGroups.size()); + tempCgUuid = createTemporaryConsistencyGroup(snapshotClient, storageStrategy, authHeader, tempCgName, flexVolGroups.keySet()); + + logger.info("takeVMSnapshot: [CG:Start] Starting phase-1 snapshot [{}] for temporary consistency group [{}]", + snapshotNameBase, tempCgUuid); + startConsistencyGroupSnapshot(snapshotClient, storageStrategy, authHeader, tempCgUuid, snapshotNameBase); + + cgSnapshotUuid = resolveConsistencyGroupSnapshotUuid(snapshotClient, authHeader, tempCgUuid, snapshotNameBase); + + logger.info("takeVMSnapshot: [CG:Commit] Committing phase-2 snapshot [{}] (uuid={}) for temporary consistency group [{}]", + snapshotNameBase, cgSnapshotUuid, tempCgUuid); + commitConsistencyGroupSnapshot(snapshotClient, storageStrategy, authHeader, tempCgUuid, cgSnapshotUuid); + + // Resolve per-FlexVol snapshot UUIDs and build one detail entry per CloudStack volume. + for (Map.Entry entry : flexVolGroups.entrySet()) { + String flexVolUuid = entry.getKey(); + FlexVolGroupInfo groupInfo = entry.getValue(); + String snapshotUuid = resolveSnapshotUuid(snapshotClient, authHeader, flexVolUuid, snapshotNameBase); + String protocol = groupInfo.poolDetails.get(OntapStorageConstants.PROTOCOL); + + for (Long volumeId : groupInfo.volumeIds) { + String volumePath = resolveVolumePathOnOntap(volumeId, protocol, groupInfo.poolDetails); + createdSnapshots.add(new FlexVolSnapshotDetail( + flexVolUuid, snapshotUuid, snapshotNameBase, volumePath, groupInfo.poolId, protocol)); + } + + logger.debug("takeVMSnapshot: [CG:Resolve] Snapshot [{}] resolved to FlexVol snapshot uuid [{}] for FlexVol [{}], volumes={}", + snapshotNameBase, snapshotUuid, flexVolUuid, groupInfo.volumeIds); + } + } finally { + // CG is only a transaction boundary; remove it after commit/failure and keep snapshots intact. + if (tempCgUuid != null) { + try { + logger.info("takeVMSnapshot: [CG:Cleanup] Deleting temporary consistency group [{}]", tempCgUuid); + deleteTemporaryConsistencyGroup(snapshotClient, storageStrategy, authHeader, tempCgUuid); + } catch (Exception cleanupEx) { + logger.warn("takeVMSnapshot: Failed to delete temporary consistency group [{}]: {}", + tempCgUuid, cleanupEx.getMessage()); + } + } + } + + logger.info("takeVMSnapshot: Temporary consistency-group two-phase flow completed for VM [{}] in {} ms. CG snapshot uuid={}, detail rows={}", + userVm.getInstanceName(), + TimeUnit.MILLISECONDS.convert(System.nanoTime() - cgFlowStart, TimeUnit.NANOSECONDS), + cgSnapshotUuid, createdSnapshots.size()); + } + + /** + * Builds an ONTAP-safe snapshot name from the CloudStack VM snapshot UI name. + */ + String buildSnapshotName(VMSnapshot vmSnapshot) { + return OntapStorageUtils.buildOntapSnapshotName(vmSnapshot.getName(), "vm" + vmSnapshot.getId()); + } + + /** + * Wrapper for static utility to simplify unit testing and keep call sites explicit. + */ + StorageStrategy resolveStorageStrategy(Map poolDetails) { + return OntapStorageUtils.getStrategyByStoragePoolDetails(poolDetails); } /** @@ -695,6 +784,116 @@ String resolveSnapshotUuid(SnapshotFeignClient client, String authHeader, return response.getRecords().get(0).getUuid(); } + /** + * Builds a deterministic temporary CG name for the VM snapshot transaction. + */ + String buildTemporaryConsistencyGroupName(VMSnapshot vmSnapshot) { + return OntapStorageUtils.buildOntapSnapshotName( + OntapStorageConstants.ONTAP_TEMP_CG_PREFIX + vmSnapshot.getId(), + "cg" + vmSnapshot.getId()); + } + + /** + * Creates a temporary consistency group for the involved FlexVol UUIDs and returns its UUID. + */ + String createTemporaryConsistencyGroup(SnapshotFeignClient client, StorageStrategy storageStrategy, + String authHeader, String cgName, Set flexVolUuids) { + List> volumeRefs = new ArrayList<>(); + for (String flexVolUuid : flexVolUuids) { + Map volumeRef = new LinkedHashMap<>(); + volumeRef.put("uuid", flexVolUuid); + volumeRefs.add(volumeRef); + } + + Map payload = new LinkedHashMap<>(); + payload.put("name", cgName); + payload.put("volumes", volumeRefs); + + JobResponse response = client.createConsistencyGroup(authHeader, payload); + storageStrategy.pollJobIfPresent(response, "create temporary consistency group " + cgName); + + String cgUuid = resolveConsistencyGroupUuidByName(client, authHeader, cgName); + if (cgUuid == null || cgUuid.isEmpty()) { + throw new CloudRuntimeException("Unable to resolve temporary consistency group UUID for [" + cgName + "]"); + } + return cgUuid; + } + + /** + * Starts phase-1 of the two-phase CG snapshot. + */ + void startConsistencyGroupSnapshot(SnapshotFeignClient client, StorageStrategy storageStrategy, + String authHeader, String cgUuid, String snapshotName) { + Map payload = new LinkedHashMap<>(); + payload.put("name", snapshotName); + payload.put("action", "start"); + JobResponse response = client.createConsistencyGroupSnapshot(authHeader, cgUuid, payload); + storageStrategy.pollJobIfPresent(response, "start CG snapshot " + snapshotName + " for " + cgUuid); + } + + /** + * Commits phase-2 of the started CG snapshot. + */ + void commitConsistencyGroupSnapshot(SnapshotFeignClient client, StorageStrategy storageStrategy, + String authHeader, String cgUuid, String snapshotUuid) { + Map payload = new LinkedHashMap<>(); + payload.put("action", "commit"); + JobResponse response = client.commitConsistencyGroupSnapshot(authHeader, cgUuid, snapshotUuid, payload); + storageStrategy.pollJobIfPresent(response, "commit CG snapshot " + snapshotUuid + " for " + cgUuid); + } + + /** + * Deletes the temporary consistency group used as a transaction boundary. + */ + void deleteTemporaryConsistencyGroup(SnapshotFeignClient client, StorageStrategy storageStrategy, + String authHeader, String cgUuid) { + JobResponse response = client.deleteConsistencyGroup(authHeader, cgUuid); + storageStrategy.pollJobIfPresent(response, "delete temporary consistency group " + cgUuid); + } + + /** + * Resolves consistency group UUID by name. + */ + String resolveConsistencyGroupUuidByName(SnapshotFeignClient client, String authHeader, String cgName) { + Map query = new HashMap<>(); + query.put("name", cgName); + query.put("fields", "uuid,name"); + OntapResponse> response = client.getConsistencyGroups(authHeader, query); + if (response == null || response.getRecords() == null || response.getRecords().isEmpty()) { + return null; + } + return getStringField(response.getRecords().get(0), "uuid"); + } + + /** + * Resolves consistency group snapshot UUID by name. + */ + String resolveConsistencyGroupSnapshotUuid(SnapshotFeignClient client, String authHeader, + String cgUuid, String snapshotName) { + Map query = new HashMap<>(); + query.put("name", snapshotName); + query.put("fields", "uuid,name"); + OntapResponse> response = client.getConsistencyGroupSnapshots(authHeader, cgUuid, query); + if (response == null || response.getRecords() == null || response.getRecords().isEmpty()) { + throw new CloudRuntimeException("Unable to resolve consistency group snapshot UUID for snapshot [" + + snapshotName + "] in CG [" + cgUuid + "]"); + } + String snapshotUuid = getStringField(response.getRecords().get(0), "uuid"); + if (snapshotUuid == null || snapshotUuid.isEmpty()) { + throw new CloudRuntimeException("Invalid consistency group snapshot UUID for snapshot [" + + snapshotName + "] in CG [" + cgUuid + "]"); + } + return snapshotUuid; + } + + private String getStringField(Map record, String key) { + if (record == null) { + return null; + } + Object value = record.get(key); + return value != null ? value.toString() : null; + } + /** * Resolves the ONTAP-side path of a CloudStack volume within its FlexVolume. * @@ -735,7 +934,7 @@ String resolveVolumePathOnOntap(Long volumeId, String protocol, Map poolDetails = storagePoolDetailsDao.listDetailsKeyPairs(detail.poolId); - StorageStrategy storageStrategy = OntapStorageUtils.getStrategyByStoragePoolDetails(poolDetails); + StorageStrategy storageStrategy = resolveStorageStrategy(poolDetails); SnapshotFeignClient client = storageStrategy.getSnapshotFeignClient(); String authHeader = storageStrategy.getAuthHeader(); @@ -769,20 +968,17 @@ void deleteFlexVolSnapshots(List flexVolDetails) { // Only delete the ONTAP snapshot once per FlexVol+Snapshot pair if (!deletedSnapshots.containsKey(dedupeKey)) { Map poolDetails = storagePoolDetailsDao.listDetailsKeyPairs(detail.poolId); - StorageStrategy storageStrategy = OntapStorageUtils.getStrategyByStoragePoolDetails(poolDetails); - SnapshotFeignClient client = storageStrategy.getSnapshotFeignClient(); - String authHeader = storageStrategy.getAuthHeader(); + StorageStrategy storageStrategy = resolveStorageStrategy(poolDetails); logger.info("deleteFlexVolSnapshots: Deleting ONTAP FlexVol snapshot [{}] (uuid={}) on FlexVol [{}]", detail.snapshotName, detail.snapshotUuid, detail.flexVolUuid); - JobResponse jobResponse = client.deleteSnapshot(authHeader, detail.flexVolUuid, detail.snapshotUuid); - if (jobResponse != null && jobResponse.getJob() != null) { - storageStrategy.jobPollForSuccess(jobResponse.getJob().getUuid(), 30, 2000); - } + storageStrategy.deleteFlexVolSnapshotForCloudStackVolume( + detail.flexVolUuid, detail.snapshotUuid, detail.snapshotName); deletedSnapshots.put(dedupeKey, Boolean.TRUE); - logger.info("deleteFlexVolSnapshots: Deleted ONTAP FlexVol snapshot [{}] on FlexVol [{}]", detail.snapshotName, detail.flexVolUuid); + logger.info("deleteFlexVolSnapshots: Deleted ONTAP FlexVol snapshot [{}] on FlexVol [{}]", + detail.snapshotName, detail.flexVolUuid); } // Always remove the DB detail row @@ -818,41 +1014,24 @@ void revertFlexVolSnapshots(List flexVolDetails) { } Map poolDetails = storagePoolDetailsDao.listDetailsKeyPairs(detail.poolId); - StorageStrategy storageStrategy = OntapStorageUtils.getStrategyByStoragePoolDetails(poolDetails); - SnapshotFeignClient snapshotClient = storageStrategy.getSnapshotFeignClient(); - String authHeader = storageStrategy.getAuthHeader(); + StorageStrategy storageStrategy = resolveStorageStrategy(poolDetails); - // Get SVM name and FlexVolume name from pool details - String svmName = poolDetails.get(OntapStorageConstants.SVM_NAME); String flexVolName = poolDetails.get(OntapStorageConstants.VOLUME_NAME); - - if (svmName == null || svmName.isEmpty()) { - throw new CloudRuntimeException("SVM name not found in pool details for pool [" + detail.poolId + "]"); - } if (flexVolName == null || flexVolName.isEmpty()) { throw new CloudRuntimeException("FlexVolume name not found in pool details for pool [" + detail.poolId + "]"); } - // The path must start with "/" for the ONTAP CLI API String ontapFilePath = detail.volumePath.startsWith("/") ? detail.volumePath : "/" + detail.volumePath; logger.info("revertFlexVolSnapshots: Restoring volume [{}] from FlexVol snapshot [{}] on FlexVol [{}] (protocol={})", ontapFilePath, detail.snapshotName, flexVolName, detail.protocol); - // Use CLI-based restore API: POST /api/private/cli/volume/snapshot/restore-file - CliSnapshotRestoreRequest restoreRequest = new CliSnapshotRestoreRequest( - svmName, flexVolName, detail.snapshotName, ontapFilePath); - - JobResponse jobResponse = snapshotClient.restoreFileFromSnapshotCli(authHeader, restoreRequest); + JobResponse jobResponse = storageStrategy.revertSnapshotForCloudStackVolume( + detail.snapshotName, detail.flexVolUuid, detail.snapshotUuid, + detail.volumePath, null, flexVolName); - if (jobResponse != null && jobResponse.getJob() != null) { - Boolean success = storageStrategy.jobPollForSuccess(jobResponse.getJob().getUuid(), 60, 2000); - if (!success) { - throw new CloudRuntimeException("Snapshot file restore failed for volume path [" + - ontapFilePath + "] from snapshot [" + detail.snapshotName + - "] on FlexVol [" + flexVolName + "]"); - } - } + storageStrategy.executeCliSfsrRestore(jobResponse, + "VM snapshot file restore for path [" + ontapFilePath + "] from snapshot [" + detail.snapshotName + "]"); logger.info("revertFlexVolSnapshots: Successfully restored volume [{}] from snapshot [{}] on FlexVol [{}]", ontapFilePath, detail.snapshotName, flexVolName); diff --git a/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/driver/OntapPrimaryDatastoreDriverTest.java b/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/driver/OntapPrimaryDatastoreDriverTest.java index b535217fd235..e176bd3993b3 100644 --- a/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/driver/OntapPrimaryDatastoreDriverTest.java +++ b/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/driver/OntapPrimaryDatastoreDriverTest.java @@ -134,6 +134,7 @@ void testGetCapabilities() { // so StorageSystemSnapshotStrategy handles snapshot backup to secondary storage assertEquals(Boolean.TRUE.toString(), capabilities.get("STORAGE_SYSTEM_SNAPSHOT")); assertEquals(Boolean.TRUE.toString(), capabilities.get("CAN_CREATE_VOLUME_FROM_SNAPSHOT")); + assertEquals(Boolean.TRUE.toString(), capabilities.get("CAN_REVERT_VOLUME_TO_SNAPSHOT")); } @Test diff --git a/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/service/StorageStrategyTest.java b/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/service/StorageStrategyTest.java index 86ef1d7c79b6..1639853a24f1 100644 --- a/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/service/StorageStrategyTest.java +++ b/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/service/StorageStrategyTest.java @@ -24,6 +24,7 @@ import org.apache.cloudstack.storage.feign.client.JobFeignClient; import org.apache.cloudstack.storage.feign.client.NetworkFeignClient; import org.apache.cloudstack.storage.feign.client.SANFeignClient; +import org.apache.cloudstack.storage.feign.client.SnapshotFeignClient; import org.apache.cloudstack.storage.feign.client.SvmFeignClient; import org.apache.cloudstack.storage.feign.client.VolumeFeignClient; import org.apache.cloudstack.storage.feign.model.Aggregate; @@ -88,6 +89,9 @@ public class StorageStrategyTest { @Mock private SANFeignClient sanFeignClient; + @Mock + private SnapshotFeignClient snapshotFeignClient; + private TestableStorageStrategy storageStrategy; // Concrete implementation for testing abstract class @@ -98,7 +102,8 @@ public TestableStorageStrategy(OntapStorage ontapStorage, SvmFeignClient svmFeignClient, JobFeignClient jobFeignClient, NetworkFeignClient networkFeignClient, - SANFeignClient sanFeignClient) { + SANFeignClient sanFeignClient, + SnapshotFeignClient snapshotFeignClient) { super(ontapStorage); // Use reflection to replace the private Feign client fields with mocked ones injectMockedClient("aggregateFeignClient", aggregateFeignClient); @@ -107,6 +112,7 @@ public TestableStorageStrategy(OntapStorage ontapStorage, injectMockedClient("jobFeignClient", jobFeignClient); injectMockedClient("networkFeignClient", networkFeignClient); injectMockedClient("sanFeignClient", sanFeignClient); + injectMockedClient("snapshotFeignClient", snapshotFeignClient); } private void injectMockedClient(String fieldName, Object mockedClient) { @@ -192,7 +198,7 @@ void setUp() { // For testing, we'll need to mock the FeignClientFactory behavior storageStrategy = new TestableStorageStrategy(ontapStorage, aggregateFeignClient, volumeFeignClient, svmFeignClient, - jobFeignClient, networkFeignClient, sanFeignClient); + jobFeignClient, networkFeignClient, sanFeignClient, snapshotFeignClient); } // ========== connect() Tests ========== @@ -291,7 +297,7 @@ public void testConnect_iscsiNotEnabled() { "svm1", 5000000000L, ProtocolType.ISCSI); storageStrategy = new TestableStorageStrategy(iscsiStorage, aggregateFeignClient, volumeFeignClient, svmFeignClient, - jobFeignClient, networkFeignClient, sanFeignClient); + jobFeignClient, networkFeignClient, sanFeignClient, snapshotFeignClient); Svm svm = new Svm(); svm.setName("svm1"); @@ -608,7 +614,7 @@ public void testGetStoragePath_iscsi() { "svm1", null, ProtocolType.ISCSI); storageStrategy = new TestableStorageStrategy(iscsiStorage, aggregateFeignClient, volumeFeignClient, svmFeignClient, - jobFeignClient, networkFeignClient, sanFeignClient); + jobFeignClient, networkFeignClient, sanFeignClient, snapshotFeignClient); IscsiService.IscsiServiceTarget target = new IscsiService.IscsiServiceTarget(); target.setName("iqn.1992-08.com.netapp:sn.123456:vs.1"); @@ -638,7 +644,7 @@ public void testGetStoragePath_iscsi_noService() { "svm1", null, ProtocolType.ISCSI); storageStrategy = new TestableStorageStrategy(iscsiStorage, aggregateFeignClient, volumeFeignClient, svmFeignClient, - jobFeignClient, networkFeignClient, sanFeignClient); + jobFeignClient, networkFeignClient, sanFeignClient, snapshotFeignClient); OntapResponse emptyResponse = new OntapResponse<>(); emptyResponse.setRecords(new ArrayList<>()); @@ -659,7 +665,7 @@ public void testGetStoragePath_iscsi_noTargetIqn() { "svm1", null, ProtocolType.ISCSI); storageStrategy = new TestableStorageStrategy(iscsiStorage, aggregateFeignClient, volumeFeignClient, svmFeignClient, - jobFeignClient, networkFeignClient, sanFeignClient); + jobFeignClient, networkFeignClient, sanFeignClient, snapshotFeignClient); IscsiService iscsiService = new IscsiService(); iscsiService.setTarget(null); @@ -709,7 +715,7 @@ public void testGetNetworkInterface_iscsi() { "svm1", null, ProtocolType.ISCSI); storageStrategy = new TestableStorageStrategy(iscsiStorage, aggregateFeignClient, volumeFeignClient, svmFeignClient, - jobFeignClient, networkFeignClient, sanFeignClient); + jobFeignClient, networkFeignClient, sanFeignClient, snapshotFeignClient); IpInterface.IpInfo ipInfo = new IpInterface.IpInfo(); ipInfo.setAddress("192.168.1.51"); @@ -823,4 +829,66 @@ private void setupSuccessfulJobCreation() { when(volumeFeignClient.getVolume(anyString(), anyMap())) .thenReturn(volumeResponse); } + + // ========== pollJobIfPresent / executeCliSfsrRestore Tests ========== + + @Test + void testPollJobIfPresent_NoJob_DoesNotPoll() { + storageStrategy.pollJobIfPresent(null, "test operation"); + storageStrategy.pollJobIfPresent(new JobResponse(), "test operation"); + verify(jobFeignClient, times(0)).getJobByUUID(anyString(), anyString()); + } + + @Test + void testPollJobIfPresent_WithJob_PollsUntilSuccess() { + Job job = new Job(); + job.setUuid("sfsr-job-1"); + JobResponse response = new JobResponse(); + response.setJob(job); + + Job completedJob = new Job(); + completedJob.setUuid("sfsr-job-1"); + completedJob.setState(OntapStorageConstants.JOB_SUCCESS); + when(jobFeignClient.getJobByUUID(anyString(), eq("sfsr-job-1"))).thenReturn(completedJob); + + storageStrategy.executeCliSfsrRestore(response, "CLI SFSR restore"); + + verify(jobFeignClient, atLeastOnce()).getJobByUUID(anyString(), eq("sfsr-job-1")); + } + + @Test + void testPollJobIfPresent_JobFailure_ThrowsCloudRuntimeException() { + Job job = new Job(); + job.setUuid("sfsr-job-fail"); + JobResponse response = new JobResponse(); + response.setJob(job); + + Job failedJob = new Job(); + failedJob.setUuid("sfsr-job-fail"); + failedJob.setState(OntapStorageConstants.JOB_FAILURE); + failedJob.setMessage("restore failed"); + when(jobFeignClient.getJobByUUID(anyString(), eq("sfsr-job-fail"))).thenReturn(failedJob); + + assertThrows(CloudRuntimeException.class, + () -> storageStrategy.executeCliSfsrRestore(response, "CLI SFSR restore")); + } + + @Test + void testDeleteFlexVolSnapshotForCloudStackVolume_PollsJobAndSucceeds() { + Job job = new Job(); + job.setUuid("delete-job-1"); + JobResponse response = new JobResponse(); + response.setJob(job); + when(snapshotFeignClient.deleteSnapshot(anyString(), eq("fv-uuid-1"), eq("snap-uuid-1"))) + .thenReturn(response); + + Job completedJob = new Job(); + completedJob.setUuid("delete-job-1"); + completedJob.setState(OntapStorageConstants.JOB_SUCCESS); + when(jobFeignClient.getJobByUUID(anyString(), eq("delete-job-1"))).thenReturn(completedJob); + + storageStrategy.deleteFlexVolSnapshotForCloudStackVolume("fv-uuid-1", "snap-uuid-1", "snap-name-1"); + + verify(snapshotFeignClient).deleteSnapshot(anyString(), eq("fv-uuid-1"), eq("snap-uuid-1")); + } } diff --git a/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/vmsnapshot/OntapVMSnapshotStrategyTest.java b/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/vmsnapshot/OntapVMSnapshotStrategyTest.java index b069ab7246a0..9d7989d65e7a 100644 --- a/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/vmsnapshot/OntapVMSnapshotStrategyTest.java +++ b/plugins/storage/volume/ontap/src/test/java/org/apache/cloudstack/storage/vmsnapshot/OntapVMSnapshotStrategyTest.java @@ -21,12 +21,17 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.lenient; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -44,6 +49,12 @@ import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; +import org.apache.cloudstack.storage.feign.client.SnapshotFeignClient; +import org.apache.cloudstack.storage.feign.model.FlexVolSnapshot; +import org.apache.cloudstack.storage.feign.model.Job; +import org.apache.cloudstack.storage.feign.model.response.JobResponse; +import org.apache.cloudstack.storage.feign.model.response.OntapResponse; +import org.apache.cloudstack.storage.service.StorageStrategy; import org.apache.cloudstack.storage.to.VolumeObjectTO; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -127,6 +138,10 @@ class OntapVMSnapshotStrategyTest { private VolumeDataFactory volumeDataFactory; @Mock private VolumeDetailsDao volumeDetailsDao; + @Mock + private StorageStrategy storageStrategy; + @Mock + private SnapshotFeignClient snapshotFeignClient; @Spy @InjectMocks @@ -226,14 +241,18 @@ void testCanHandle_AllocatedDiskType_VmxenHypervisor_ReturnsCantHandle() { } @Test - void testCanHandle_AllocatedDiskType_VmNotRunning_ReturnsCantHandle() { + void testCanHandle_AllocatedDiskType_VmStopped_ReturnsHighest() { UserVmVO userVm = createMockUserVm(Hypervisor.HypervisorType.KVM, VirtualMachine.State.Stopped); when(userVmDao.findById(VM_ID)).thenReturn(userVm); VMSnapshotVO vmSnapshot = createMockVmSnapshot(VMSnapshot.State.Allocated, VMSnapshot.Type.Disk); + VolumeVO vol = createMockVolume(VOLUME_ID_1, POOL_ID_1); + when(volumeDao.findByInstance(VM_ID)).thenReturn(Collections.singletonList(vol)); + StoragePoolVO pool = createOntapManagedPool(POOL_ID_1); + when(storagePool.findById(POOL_ID_1)).thenReturn(pool); StrategyPriority result = strategy.canHandle(vmSnapshot); - assertEquals(StrategyPriority.CANT_HANDLE, result); + assertEquals(StrategyPriority.HIGHEST, result); } @Test @@ -593,10 +612,11 @@ void testFlexVolSnapshotDetail_Parse5Parts_ThrowsException() { void testBuildSnapshotName_Format() { VMSnapshotVO vmSnapshot = mock(VMSnapshotVO.class); when(vmSnapshot.getId()).thenReturn(SNAPSHOT_ID); + when(vmSnapshot.getName()).thenReturn("UI VM Snapshot"); String name = strategy.buildSnapshotName(vmSnapshot); - assertEquals(true, name.startsWith("vmsnap_200_")); + assertEquals(true, name.startsWith("UI_VM_Snapshot_vm200")); assertEquals(true, name.length() <= OntapStorageConstants.MAX_SNAPSHOT_NAME_LENGTH); } @@ -732,6 +752,86 @@ void testTakeVMSnapshot_OperationTimeout_ThrowsCloudRuntimeException() throws Ex assertEquals(true, ex.getMessage().contains("timed out")); } + @Test + void testTakeVMSnapshot_SingleFlexVolSuccess_UsesDirectSnapshotNotCg() throws Exception { + VMSnapshotVO vmSnapshot = createTakeSnapshotVmSnapshot(); + setupTakeSnapshotCommon(vmSnapshot); + setupSingleVolumeForTakeSnapshot(); + + String snapshotName = strategy.buildSnapshotName(vmSnapshot); + setupSingleFlexVolFlowMocks(snapshotName); + + FreezeThawVMAnswer freezeAnswer = mock(FreezeThawVMAnswer.class); + when(freezeAnswer.getResult()).thenReturn(true); + FreezeThawVMAnswer thawAnswer = mock(FreezeThawVMAnswer.class); + when(thawAnswer.getResult()).thenReturn(true); + when(agentMgr.send(eq(HOST_ID), any(FreezeThawVMCommand.class))) + .thenReturn(freezeAnswer) + .thenReturn(thawAnswer); + + strategy.takeVMSnapshot(vmSnapshot); + + verify(snapshotFeignClient, times(1)).createSnapshot(any(), eq("flexvol-uuid-1"), any()); + verify(snapshotFeignClient, never()).createConsistencyGroup(any(), any()); + verify(snapshotFeignClient, never()).createConsistencyGroupSnapshot(any(), any(), any()); + verify(snapshotFeignClient, never()).commitConsistencyGroupSnapshot(any(), any(), any(), any()); + verify(snapshotFeignClient, never()).deleteConsistencyGroup(any(), any()); + verify(vmSnapshotDetailsDao, atLeastOnce()).persist(any(VMSnapshotDetailsVO.class)); + } + + @Test + void testTakeVMSnapshot_TemporaryCgTwoPhaseSuccess_PersistsDetailsAndCleansUpCg() throws Exception { + VMSnapshotVO vmSnapshot = createTakeSnapshotVmSnapshot(); + setupTakeSnapshotCommon(vmSnapshot); + setupMultiFlexVolForTakeSnapshot(); + + String snapshotName = strategy.buildSnapshotName(vmSnapshot); + setupTemporaryCgFlowMocks(snapshotName); + + FreezeThawVMAnswer freezeAnswer = mock(FreezeThawVMAnswer.class); + when(freezeAnswer.getResult()).thenReturn(true); + FreezeThawVMAnswer thawAnswer = mock(FreezeThawVMAnswer.class); + when(thawAnswer.getResult()).thenReturn(true); + when(agentMgr.send(eq(HOST_ID), any(FreezeThawVMCommand.class))) + .thenReturn(freezeAnswer) + .thenReturn(thawAnswer); + + strategy.takeVMSnapshot(vmSnapshot); + + verify(snapshotFeignClient, times(1)).createConsistencyGroup(any(), any()); + verify(snapshotFeignClient, times(1)).createConsistencyGroupSnapshot(any(), eq("cg-uuid-1"), any()); + verify(snapshotFeignClient, times(1)).commitConsistencyGroupSnapshot(any(), eq("cg-uuid-1"), eq("cg-snap-uuid-1"), any()); + verify(snapshotFeignClient, times(1)).deleteConsistencyGroup(any(), eq("cg-uuid-1")); + verify(vmSnapshotDetailsDao, atLeastOnce()).persist(any(VMSnapshotDetailsVO.class)); + } + + @Test + void testTakeVMSnapshot_TemporaryCgStartFails_TransitionsToOperationFailed() throws Exception { + VMSnapshotVO vmSnapshot = createTakeSnapshotVmSnapshot(); + setupTakeSnapshotCommon(vmSnapshot); + setupMultiFlexVolForTakeSnapshot(); + + String snapshotName = strategy.buildSnapshotName(vmSnapshot); + setupTemporaryCgFlowMocks(snapshotName); + when(snapshotFeignClient.createConsistencyGroupSnapshot(any(), eq("cg-uuid-1"), any())) + .thenThrow(new CloudRuntimeException("start phase failed")); + + FreezeThawVMAnswer freezeAnswer = mock(FreezeThawVMAnswer.class); + when(freezeAnswer.getResult()).thenReturn(true); + FreezeThawVMAnswer thawAnswer = mock(FreezeThawVMAnswer.class); + when(thawAnswer.getResult()).thenReturn(true); + when(agentMgr.send(eq(HOST_ID), any(FreezeThawVMCommand.class))) + .thenReturn(freezeAnswer) + .thenReturn(thawAnswer); + when(vmSnapshotDetailsDao.listDetails(SNAPSHOT_ID)).thenReturn(Collections.emptyList()); + doReturn(true).when(vmSnapshotHelper).vmSnapshotStateTransitTo(any(), eq(VMSnapshot.Event.OperationFailed)); + + assertThrows(CloudRuntimeException.class, () -> strategy.takeVMSnapshot(vmSnapshot)); + + verify(snapshotFeignClient, times(1)).deleteConsistencyGroup(any(), eq("cg-uuid-1")); + verify(vmSnapshotHelper, atLeastOnce()).vmSnapshotStateTransitTo(any(), eq(VMSnapshot.Event.OperationFailed)); + } + // ══════════════════════════════════════════════════════════════════════════ // Tests: Quiesce Behavior // ══════════════════════════════════════════════════════════════════════════ @@ -746,20 +846,9 @@ void testTakeVMSnapshot_QuiesceFalse_SkipsFreezeThaw() throws Exception { setupTakeSnapshotCommon(vmSnapshot); setupSingleVolumeForTakeSnapshot(); + setupSingleFlexVolFlowMocks(strategy.buildSnapshotName(vmSnapshot)); - // The FlexVolume snapshot flow will try to call Utility.getStrategyByStoragePoolDetails - // which is a static method that makes real connections. We expect this to fail in unit tests. - // The important thing is that freeze/thaw was NOT called before the failure. - when(vmSnapshotDetailsDao.listDetails(SNAPSHOT_ID)).thenReturn(Collections.emptyList()); - doReturn(true).when(vmSnapshotHelper).vmSnapshotStateTransitTo(any(), eq(VMSnapshot.Event.OperationFailed)); - - // Since Utility.getStrategyByStoragePoolDetails is static and creates real Feign clients, - // this will fail. We just verify that freeze was never called. - try { - strategy.takeVMSnapshot(vmSnapshot); - } catch (Exception e) { - // Expected — static utility can't be mocked in unit test - } + strategy.takeVMSnapshot(vmSnapshot); // No freeze/thaw commands should be sent when quiesce is false verify(agentMgr, never()).send(eq(HOST_ID), any(FreezeThawVMCommand.class)); @@ -790,16 +879,9 @@ void testTakeVMSnapshot_WithParentSnapshot_SetsParentId() throws Exception { when(agentMgr.send(eq(HOST_ID), any(FreezeThawVMCommand.class))) .thenReturn(freezeAnswer) .thenReturn(thawAnswer); + setupSingleFlexVolFlowMocks(strategy.buildSnapshotName(vmSnapshot)); - when(vmSnapshotDetailsDao.listDetails(SNAPSHOT_ID)).thenReturn(Collections.emptyList()); - doReturn(true).when(vmSnapshotHelper).vmSnapshotStateTransitTo(any(), eq(VMSnapshot.Event.OperationFailed)); - - // FlexVol snapshot flow will fail on static method, but parent should already be set - try { - strategy.takeVMSnapshot(vmSnapshot); - } catch (Exception e) { - // Expected - } + strategy.takeVMSnapshot(vmSnapshot); // Verify parent was set on the VM snapshot before the FlexVol snapshot attempt verify(vmSnapshot).setParent(199L); @@ -820,15 +902,9 @@ void testTakeVMSnapshot_WithNoParentSnapshot_SetsParentNull() throws Exception { when(agentMgr.send(eq(HOST_ID), any(FreezeThawVMCommand.class))) .thenReturn(freezeAnswer) .thenReturn(thawAnswer); + setupSingleFlexVolFlowMocks(strategy.buildSnapshotName(vmSnapshot)); - when(vmSnapshotDetailsDao.listDetails(SNAPSHOT_ID)).thenReturn(Collections.emptyList()); - doReturn(true).when(vmSnapshotHelper).vmSnapshotStateTransitTo(any(), eq(VMSnapshot.Event.OperationFailed)); - - try { - strategy.takeVMSnapshot(vmSnapshot); - } catch (Exception e) { - // Expected - } + strategy.takeVMSnapshot(vmSnapshot); verify(vmSnapshot).setParent(null); } @@ -866,6 +942,9 @@ private UserVmVO setupTakeSnapshotCommon(VMSnapshotVO vmSnapshot) throws Excepti when(vmSnapshotDao.findCurrentSnapshotByVmId(VM_ID)).thenReturn(null); doReturn(true).when(vmSnapshotHelper).vmSnapshotStateTransitTo(vmSnapshot, VMSnapshot.Event.CreateRequested); + doNothing().when(strategy).processAnswer(any(), any(), any(), any()); + doNothing().when(strategy).publishUsageEvent(any(), any(), any(), any()); + doNothing().when(strategy).publishUsageEvent(any(), any(), any(), anyLong(), anyLong()); return userVm; } @@ -880,6 +959,7 @@ private void setupSingleVolumeForTakeSnapshot() { VolumeVO volumeVO = mock(VolumeVO.class); when(volumeVO.getId()).thenReturn(VOLUME_ID_1); when(volumeVO.getPoolId()).thenReturn(POOL_ID_1); + when(volumeVO.getPath()).thenReturn("volume-301.qcow2"); when(volumeVO.getVmSnapshotChainSize()).thenReturn(null); when(volumeDao.findById(VOLUME_ID_1)).thenReturn(volumeVO); @@ -899,4 +979,127 @@ private void setupSingleVolumeForTakeSnapshot() { when(volumeInfo.getName()).thenReturn("vol-1"); when(volumeDataFactory.getVolume(VOLUME_ID_1)).thenReturn(volumeInfo); } + + private void setupMultiFlexVolForTakeSnapshot() { + VolumeObjectTO volumeTO1 = mock(VolumeObjectTO.class); + when(volumeTO1.getId()).thenReturn(VOLUME_ID_1); + when(volumeTO1.getSize()).thenReturn(10737418240L); + VolumeObjectTO volumeTO2 = mock(VolumeObjectTO.class); + when(volumeTO2.getId()).thenReturn(VOLUME_ID_2); + when(volumeTO2.getSize()).thenReturn(10737418240L); + List volumeTOs = Arrays.asList(volumeTO1, volumeTO2); + when(vmSnapshotHelper.getVolumeTOList(VM_ID)).thenReturn(volumeTOs); + + VolumeVO volumeVO1 = mock(VolumeVO.class); + when(volumeVO1.getId()).thenReturn(VOLUME_ID_1); + when(volumeVO1.getPoolId()).thenReturn(POOL_ID_1); + when(volumeVO1.getPath()).thenReturn("volume-301.qcow2"); + when(volumeVO1.getVmSnapshotChainSize()).thenReturn(null); + when(volumeDao.findById(VOLUME_ID_1)).thenReturn(volumeVO1); + + VolumeVO volumeVO2 = mock(VolumeVO.class); + when(volumeVO2.getId()).thenReturn(VOLUME_ID_2); + when(volumeVO2.getPoolId()).thenReturn(POOL_ID_2); + when(volumeVO2.getPath()).thenReturn("volume-302.qcow2"); + when(volumeVO2.getVmSnapshotChainSize()).thenReturn(null); + when(volumeDao.findById(VOLUME_ID_2)).thenReturn(volumeVO2); + + Map poolDetails1 = new HashMap<>(); + poolDetails1.put(OntapStorageConstants.VOLUME_UUID, "flexvol-uuid-1"); + poolDetails1.put(OntapStorageConstants.USERNAME, "admin"); + poolDetails1.put(OntapStorageConstants.PASSWORD, "pass"); + poolDetails1.put(OntapStorageConstants.STORAGE_IP, "10.0.0.1"); + poolDetails1.put(OntapStorageConstants.SVM_NAME, "svm1"); + poolDetails1.put(OntapStorageConstants.SIZE, "107374182400"); + poolDetails1.put(OntapStorageConstants.PROTOCOL, "NFS3"); + when(storagePoolDetailsDao.listDetailsKeyPairs(POOL_ID_1)).thenReturn(poolDetails1); + + Map poolDetails2 = new HashMap<>(); + poolDetails2.put(OntapStorageConstants.VOLUME_UUID, "flexvol-uuid-2"); + poolDetails2.put(OntapStorageConstants.USERNAME, "admin"); + poolDetails2.put(OntapStorageConstants.PASSWORD, "pass"); + poolDetails2.put(OntapStorageConstants.STORAGE_IP, "10.0.0.1"); + poolDetails2.put(OntapStorageConstants.SVM_NAME, "svm1"); + poolDetails2.put(OntapStorageConstants.SIZE, "107374182400"); + poolDetails2.put(OntapStorageConstants.PROTOCOL, "NFS3"); + when(storagePoolDetailsDao.listDetailsKeyPairs(POOL_ID_2)).thenReturn(poolDetails2); + + VolumeInfo volumeInfo1 = mock(VolumeInfo.class); + when(volumeInfo1.getId()).thenReturn(VOLUME_ID_1); + when(volumeDataFactory.getVolume(VOLUME_ID_1)).thenReturn(volumeInfo1); + VolumeInfo volumeInfo2 = mock(VolumeInfo.class); + when(volumeInfo2.getId()).thenReturn(VOLUME_ID_2); + when(volumeDataFactory.getVolume(VOLUME_ID_2)).thenReturn(volumeInfo2); + } + + private JobResponse createJobResponse(String uuid) { + Job job = new Job(); + job.setUuid(uuid); + JobResponse response = new JobResponse(); + response.setJob(job); + return response; + } + + private void setupSingleFlexVolFlowMocks(String snapshotName) { + doReturn(storageStrategy).when(strategy).resolveStorageStrategy(any()); + when(storageStrategy.getSnapshotFeignClient()).thenReturn(snapshotFeignClient); + when(storageStrategy.getAuthHeader()).thenReturn("Basic dGVzdDp0ZXN0"); + when(storageStrategy.jobPollForSuccess(any(), anyInt(), anyInt())).thenReturn(true); + + when(snapshotFeignClient.createSnapshot(any(), eq("flexvol-uuid-1"), any())) + .thenReturn(createJobResponse("job-fv-snap")); + + OntapResponse flexVolSnapshots = new OntapResponse<>(); + FlexVolSnapshot flexVolSnapshot = new FlexVolSnapshot(); + flexVolSnapshot.setUuid("fv-snap-uuid-1"); + flexVolSnapshot.setName(snapshotName); + flexVolSnapshots.setRecords(Collections.singletonList(flexVolSnapshot)); + when(snapshotFeignClient.getSnapshots(any(), eq("flexvol-uuid-1"), any())) + .thenReturn(flexVolSnapshots); + } + + private void setupTemporaryCgFlowMocks(String snapshotName) { + doReturn(storageStrategy).when(strategy).resolveStorageStrategy(any()); + when(storageStrategy.getSnapshotFeignClient()).thenReturn(snapshotFeignClient); + when(storageStrategy.getAuthHeader()).thenReturn("Basic dGVzdDp0ZXN0"); + when(storageStrategy.jobPollForSuccess(any(), anyInt(), anyInt())).thenReturn(true); + + when(snapshotFeignClient.createConsistencyGroup(any(), any())).thenReturn(createJobResponse("job-cg-create")); + OntapResponse> cgResponse = new OntapResponse<>(); + Map cgRecord = new HashMap<>(); + cgRecord.put("uuid", "cg-uuid-1"); + cgResponse.setRecords(Collections.singletonList(cgRecord)); + when(snapshotFeignClient.getConsistencyGroups(any(), any())).thenReturn(cgResponse); + + when(snapshotFeignClient.createConsistencyGroupSnapshot(any(), eq("cg-uuid-1"), any())) + .thenReturn(createJobResponse("job-cg-start")); + OntapResponse> cgSnapshotResponse = new OntapResponse<>(); + Map cgSnapshotRecord = new HashMap<>(); + cgSnapshotRecord.put("uuid", "cg-snap-uuid-1"); + cgSnapshotRecord.put("name", snapshotName); + cgSnapshotResponse.setRecords(Collections.singletonList(cgSnapshotRecord)); + when(snapshotFeignClient.getConsistencyGroupSnapshots(any(), eq("cg-uuid-1"), any())) + .thenReturn(cgSnapshotResponse); + when(snapshotFeignClient.commitConsistencyGroupSnapshot(any(), eq("cg-uuid-1"), eq("cg-snap-uuid-1"), any())) + .thenReturn(createJobResponse("job-cg-commit")); + + when(snapshotFeignClient.deleteConsistencyGroup(any(), eq("cg-uuid-1"))) + .thenReturn(createJobResponse("job-cg-delete")); + + OntapResponse flexVolSnapshots = new OntapResponse<>(); + FlexVolSnapshot flexVolSnapshot = new FlexVolSnapshot(); + flexVolSnapshot.setUuid("fv-snap-uuid-1"); + flexVolSnapshot.setName(snapshotName); + flexVolSnapshots.setRecords(Collections.singletonList(flexVolSnapshot)); + when(snapshotFeignClient.getSnapshots(any(), eq("flexvol-uuid-1"), any())) + .thenReturn(flexVolSnapshots); + + OntapResponse flexVolSnapshots2 = new OntapResponse<>(); + FlexVolSnapshot flexVolSnapshot2 = new FlexVolSnapshot(); + flexVolSnapshot2.setUuid("fv-snap-uuid-2"); + flexVolSnapshot2.setName(snapshotName); + flexVolSnapshots2.setRecords(Collections.singletonList(flexVolSnapshot2)); + when(snapshotFeignClient.getSnapshots(any(), eq("flexvol-uuid-2"), any())) + .thenReturn(flexVolSnapshots2); + } } diff --git a/server/src/main/java/com/cloud/storage/snapshot/SnapshotManagerImpl.java b/server/src/main/java/com/cloud/storage/snapshot/SnapshotManagerImpl.java index ce01048eaa56..dd27aa1f15a5 100755 --- a/server/src/main/java/com/cloud/storage/snapshot/SnapshotManagerImpl.java +++ b/server/src/main/java/com/cloud/storage/snapshot/SnapshotManagerImpl.java @@ -1633,7 +1633,15 @@ public SnapshotInfo takeSnapshot(VolumeInfo volume) throws ResourceAllocationExc boolean isKvmAndFileBasedStorage = isHypervisorKvmAndFileBasedStorage(volume, storagePool); boolean backupSnapToSecondary = isBackupSnapshotToSecondaryForZone(volume.getDataCenterId()); - if (isKvmAndFileBasedStorage && backupSnapToSecondary) { + updateSnapshotPayload(volume.getPoolId(), payload, isKvmAndFileBasedStorage, clusterId); + + // Managed PRIMARY snapshots (ONTAP volume snapshots, etc.) remain on primary/array storage. + // They must not use secondary archive bookkeeping (postSnapshotDirectlyToSecondary) or a physical + // secondary copy — delete is handled via StorageSystemSnapshotStrategy → driver deleteAsync. + boolean archiveSnapshotToSecondary = backupSnapToSecondary + && !isManagedPrimaryLocationSnapshot(storagePool, payload); + + if (isKvmAndFileBasedStorage && archiveSnapshotToSecondary) { DataStore imageStore = snapshotSrv.findSnapshotImageStore(snapshot); if (imageStore == null) { throw new CloudRuntimeException(String.format("Could not find any secondary storage to allocate snapshot [%s].", snapshot)); @@ -1641,8 +1649,6 @@ public SnapshotInfo takeSnapshot(VolumeInfo volume) throws ResourceAllocationExc snapshot.setImageStore(imageStore); } - updateSnapshotPayload(volume.getPoolId(), payload, isKvmAndFileBasedStorage, clusterId); - snapshot.addPayload(payload); try { SnapshotStrategy snapshotStrategy = _storageStrategyFactory.getSnapshotStrategy(snapshot, SnapshotOperation.TAKE); @@ -1656,14 +1662,22 @@ public SnapshotInfo takeSnapshot(VolumeInfo volume) throws ResourceAllocationExc SnapshotInfo snapshotOnPrimary = snapshotStrategy.takeSnapshot(snapshot); - if (backupSnapToSecondary) { + if (archiveSnapshotToSecondary) { if (!isKvmAndFileBasedStorage) { backupSnapshotToSecondary(payload.getAsyncBackup(), snapshotStrategy, snapshotOnPrimary, payload.getZoneIds(), payload.getStoragePoolIds()); } else { postSnapshotDirectlyToSecondary(snapshot, snapshotOnPrimary, snapshotId); } } else { - logger.debug("Skipping backup of snapshot [{}] to secondary due to configuration [{}].", snapshotOnPrimary.getUuid(), SnapshotInfo.BackupSnapshotAfterTakingSnapshot.key()); + if (backupSnapToSecondary && isManagedPrimaryLocationSnapshot(storagePool, payload)) { + logger.info("takeSnapshot: snapshot [{}] on managed primary pool [{}] with locationType=PRIMARY — " + + "keeping snapshot on primary/array storage only; not archiving to secondary " + + "(backup.snapshot.after.take is ignored for this snapshot class)", + snapshotId, storagePool.getId()); + } else { + logger.debug("Skipping backup of snapshot [{}] to secondary due to configuration [{}].", + snapshotOnPrimary.getUuid(), SnapshotInfo.BackupSnapshotAfterTakingSnapshot.key()); + } if (CollectionUtils.isNotEmpty(payload.getStoragePoolIds()) && payload.getAsyncBackup()) { snapshotStrategy = _storageStrategyFactory.getSnapshotStrategy(snapshot, SnapshotOperation.COPY); @@ -1678,7 +1692,7 @@ public SnapshotInfo takeSnapshot(VolumeInfo volume) throws ResourceAllocationExc postCreateSnapshot(volume.getId(), snapshotId, payload.getSnapshotPolicyId(), clusterId); snapshotZoneDao.addSnapshotToZone(snapshotId, snapshot.getDataCenterId()); - DataStoreRole dataStoreRole = backupSnapToSecondary ? snapshotHelper.getDataStoreRole(snapshot) : DataStoreRole.Primary; + DataStoreRole dataStoreRole = archiveSnapshotToSecondary ? snapshotHelper.getDataStoreRole(snapshot) : DataStoreRole.Primary; List snapshotStoreRefs = _snapshotStoreDao.listReadyBySnapshot(snapshotId, dataStoreRole); if (CollectionUtils.isEmpty(snapshotStoreRefs)) { @@ -1693,7 +1707,7 @@ public SnapshotInfo takeSnapshot(VolumeInfo volume) throws ResourceAllocationExc _resourceLimitMgr.decrementResourceCount(snapshotOwner.getId(), storeResourceType, volume.getSize() - snapshotStoreRef.getPhysicalSize()); if (!payload.getAsyncBackup()) { - if (backupSnapToSecondary) { + if (archiveSnapshotToSecondary) { copyNewSnapshotToZones(snapshotId, snapshot.getDataCenterId(), payload.getZoneIds()); } if (CollectionUtils.isNotEmpty(payload.getStoragePoolIds())) { @@ -1723,6 +1737,12 @@ public SnapshotInfo takeSnapshot(VolumeInfo volume) throws ResourceAllocationExc return snapshot; } + /** + * KVM file-based fast-path: records snapshot on image store without copying bytes, then drops the + * primary {@code snapshot_data_store} row. Used only for non-managed primary storage when + * {@code backup.snapshot.after.take} is enabled. Managed PRIMARY snapshots (e.g. ONTAP) never + * call this method — see {@link #isManagedPrimaryLocationSnapshot}. + */ private void postSnapshotDirectlyToSecondary(SnapshotInfo snapshot, SnapshotInfo snapshotOnPrimary, Long snapshotId) { logger.debug("{} was directly copied to secondary storage because the hypervisor is KVM, the primary storage is file-based and the [{}] configuration" + " is set to true.", snapshot.getSnapshotVO().toString(), SnapshotInfo.BackupSnapshotAfterTakingSnapshot); @@ -1739,6 +1759,20 @@ private void postSnapshotDirectlyToSecondary(SnapshotInfo snapshot, SnapshotInfo snapshotDetailsDao.removeDetail(snapshotOnPrimary.getId(), AsyncJob.Constants.MS_ID); } + /** + * Returns true when a volume snapshot is explicitly kept on managed primary/array storage. + * + *

For managed pools, {@link #updateSnapshotPayload} defaults {@code locationType} to + * {@link Snapshot.LocationType#PRIMARY}. ONTAP volume snapshots therefore stay on the FlexVol + * on primary — they are not moved or mirrored to secondary storage. The primary + * {@code snapshot_data_store} row must remain so volume-snapshot DELETE uses + * {@code StorageSystemSnapshotStrategy} and the primary datastore driver.

+ */ + private boolean isManagedPrimaryLocationSnapshot(StoragePool storagePool, CreateSnapshotPayload payload) { + return storagePool != null && storagePool.isManaged() + && Snapshot.LocationType.PRIMARY.equals(payload.getLocationType()); + } + @Override public boolean isHypervisorKvmAndFileBasedStorage(VolumeInfo volumeInfo, StoragePool storagePool) { Set fileBasedStores = Set.of(Storage.StoragePoolType.SharedMountPoint, Storage.StoragePoolType.NetworkFilesystem, Storage.StoragePoolType.Filesystem);