diff --git a/compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java b/compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java index ed98aef34e0..66c3427b300 100755 --- a/compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java +++ b/compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java @@ -941,6 +941,34 @@ public void run(MessageReply re) { }); } + /** + * ZSTAC-83890: pick a Connected+Enabled+KVM sibling host (excluding the suspect) to act as the + * pre-fence peer. Prefers the hinted siblings vetted by HaKvmHostSiblingChecker. Returns null + * if none are usable, in which case pre-fence MUST refuse to start to prevent split-brain. + */ + private String pickFencePeerHostUuid(List hintedSiblings, String suspectHostUuid) { + if (hintedSiblings == null || hintedSiblings.isEmpty()) { + return null; + } + List filtered = new ArrayList<>(hintedSiblings); + filtered.remove(suspectHostUuid); + if (filtered.isEmpty()) { + return null; + } + List alive = Q.New(HostVO.class) + .select(HostVO_.uuid) + .in(HostVO_.uuid, filtered) + .eq(HostVO_.status, HostStatus.Connected) + .eq(HostVO_.state, HostState.Enabled) + .eq(HostVO_.hypervisorType, VmInstanceConstant.KVM_HYPERVISOR_TYPE) + .listValues(); + if (alive == null || alive.isEmpty()) { + return null; + } + Collections.shuffle(alive); + return alive.get(0); + } + private void handle(final HaStartVmInstanceMsg msg) { thdf.chainSubmit(new ChainTask(msg) { @Override @@ -952,7 +980,7 @@ public String getSyncSignature() { public void run(final SyncTaskChain chain) { refreshVO(); - HaStartVmJudger judger; + final HaStartVmJudger judger; try { Class clz = Class.forName(msg.getJudgerClassName()); judger = (HaStartVmJudger) clz.newInstance(); @@ -967,74 +995,157 @@ public void run(final SyncTaskChain chain) { return; } - // It is better to monitor HaStartVmInstanceMsg and HaStartVmInstanceReply, - // instead of intrusively recording the scheduling record here. - // The problem is, we have two early exits: - // 1. throwing exception; - // 2. judges no need to start VM. - // thus, with monitoring, there might be false records. final VmSchedHistoryRecorder recorder = VmSchedHistoryRecorder.ofHA(msg.getVmInstanceUuid()) .withSchedReason(msg.getHaReason()) .begin(); - ErrorCodeList errList = new ErrorCodeList(); - new While<>(pluginRgty.getExtensionList(BeforeHaStartVmInstanceExtensionPoint.class)).each((ext, whileCompletion) -> { - ext.beforeHaStartVmInstance(msg.getVmInstanceUuid(), msg.getJudgerClassName(), msg.getSoftAvoidHostUuids(), new Completion(msg) { - @Override - public void success() { - whileCompletion.done(); - } - @Override - public void fail(ErrorCode errorCode) { - errList.getCauses().add(errorCode); - whileCompletion.done(); - } - }); - }).run(new WhileDoneCompletion(msg, chain) { + FlowChain fchain = FlowChainBuilder.newSimpleFlowChain(); + fchain.setName(String.format("ha-start-vm-%s", msg.getVmInstanceUuid())); + + fchain.then(new NoRollbackFlow() { + String __name__ = "ha-pre-fence-vm"; + @Override - public void done(ErrorCodeList errorCodeList) { - if (!errList.getCauses().isEmpty()) { - reply.setError(errList.getCauses().get(0)); - bus.reply(msg, reply); - recorder.withFailReason(reply.getError().getDetails()) - .end(null); - chain.next(); + public void run(FlowTrigger trigger, Map data) { + // ZSTAC-83890: self.hostUuid is usually cleared by the prior + // abnormal-lifecycle flow before HA-start runs; fall back to the + // HA-vetted softAvoid suspect, then to lastHostUuid. + String suspectHostUuid = self.getHostUuid(); + if (suspectHostUuid == null) { + List avoid = msg.getSoftAvoidHostUuids(); + if (avoid != null && !avoid.isEmpty()) { + suspectHostUuid = avoid.get(0); + } + } + if (suspectHostUuid == null) { + suspectHostUuid = self.getLastHostUuid(); + } + if (suspectHostUuid == null) { + trigger.next(); + return; + } + List siblings = msg.getSiblingHostUuids(); + if (siblings == null || siblings.isEmpty()) { + trigger.next(); + return; + } + if (!VmInstanceConstant.KVM_HYPERVISOR_TYPE.equals(self.getHypervisorType())) { + trigger.next(); + return; + } + // ZSTAC-83890: route to a Connected sibling KVMHost; never to the suspect itself. + String peerHostUuid = pickFencePeerHostUuid(siblings, suspectHostUuid); + if (peerHostUuid == null) { + trigger.fail(operr("HA-start vm[%s]: no Connected sibling KVM host available to fence" + + " suspect host[%s]. Refuse to start to prevent split-brain.", + self.getUuid(), suspectHostUuid)); return; } + FenceVmOnHostMsg fmsg = new FenceVmOnHostMsg(); + fmsg.setHostUuid(peerHostUuid); + fmsg.setSuspectHostUuid(suspectHostUuid); + fmsg.setVmUuid(self.getUuid()); + bus.makeTargetServiceIdByResourceUuid(fmsg, HostConstant.SERVICE_ID, peerHostUuid); + bus.send(fmsg, new CloudBusCallBack(trigger) { + @Override + public void run(MessageReply reply) { + if (!reply.isSuccess()) { + trigger.fail(reply.getError()); + return; + } + trigger.next(); + } + }); + } + }); + + fchain.then(new NoRollbackFlow() { + String __name__ = "before-ha-start-vm-instance"; + @Override + public void run(FlowTrigger trigger, Map data) { + ErrorCodeList errList = new ErrorCodeList(); + new While<>(pluginRgty.getExtensionList(BeforeHaStartVmInstanceExtensionPoint.class)).each((ext, wc) -> { + ext.beforeHaStartVmInstance(msg.getVmInstanceUuid(), msg.getJudgerClassName(), msg.getSoftAvoidHostUuids(), new Completion(msg) { + @Override + public void success() { + wc.done(); + } + + @Override + public void fail(ErrorCode errorCode) { + errList.getCauses().add(errorCode); + wc.done(); + } + }); + }).run(new WhileDoneCompletion(trigger) { + @Override + public void done(ErrorCodeList errorCodeList) { + if (!errList.getCauses().isEmpty()) { + trigger.fail(errList.getCauses().get(0)); + return; + } + trigger.next(); + } + }); + } + }); + + fchain.then(new NoRollbackFlow() { + String __name__ = "mark-vm-stopped-and-save-last-host"; + + @Override + public void run(FlowTrigger trigger, Map data) { logger.debug(String.format("HaStartVmJudger[%s] says the VM[uuid:%s, name:%s] is qualified for HA start, now we are starting it", judger.getClass(), self.getUuid(), self.getName())); UpdateQuery sql = SQL.New(VmInstanceVO.class) .eq(VmInstanceVO_.uuid, self.getUuid()) .set(VmInstanceVO_.state, VmInstanceState.Stopped) .set(VmInstanceVO_.hostUuid, null); - if (self.getHostUuid() != null) { sql.set(VmInstanceVO_.lastHostUuid, self.getHostUuid()); } - sql.update(); + trigger.next(); + } + }); - startVm(msg, new Completion(msg, chain) { + fchain.then(new NoRollbackFlow() { + String __name__ = "start-ha-vm"; + + @Override + public void run(FlowTrigger trigger, Map data) { + startVm(msg, new Completion(trigger) { @Override public void success() { reply.setInventory(getSelfInventory()); - bus.reply(msg, reply); - recorder.end(reply.getInventory().getHostUuid()); - chain.next(); + trigger.next(); } @Override public void fail(ErrorCode errorCode) { - reply.setError(errorCode); - bus.reply(msg, reply); - recorder.withFailReason(errorCode.getDetails()) - .end(null); - chain.next(); + trigger.fail(errorCode); } }); } }); + + fchain.done(new FlowDoneHandler(msg, chain) { + @Override + public void handle(Map data) { + bus.reply(msg, reply); + recorder.end(reply.getInventory() == null ? null : reply.getInventory().getHostUuid()); + chain.next(); + } + }).error(new FlowErrorHandler(msg, chain) { + @Override + public void handle(ErrorCode errCode, Map data) { + reply.setError(errCode); + bus.reply(msg, reply); + recorder.withFailReason(errCode.getDetails()).end(null); + chain.next(); + } + }).start(); } @Override diff --git a/header/src/main/java/org/zstack/header/vm/FenceVmOnHostMsg.java b/header/src/main/java/org/zstack/header/vm/FenceVmOnHostMsg.java new file mode 100644 index 00000000000..95d3806a328 --- /dev/null +++ b/header/src/main/java/org/zstack/header/vm/FenceVmOnHostMsg.java @@ -0,0 +1,50 @@ +package org.zstack.header.vm; + +import org.zstack.header.host.HostMessage; +import org.zstack.header.message.NeedReplyMessage; + +/** + * ZSTAC-83890 / TIC-5513 + * + * Sent from {@code VmInstanceBase.handle(HaStartVmInstanceMsg)} during HA pre-fence. Routed to a + * vetted Connected KVM sibling host's KVMHost service (the "peer"). The peer is picked at the + * management node from {@code siblingHostUuids} (hinted by HA decision) so that this message never + * has to be delivered to the suspect host whose service is by definition unreliable / Disconnected. + * + * The peer's {@code KVMHost.handle(FenceVmOnHostMsg)} loads the suspect host's SSH credentials from + * its KVMHostVO and asks its local kvmagent to SSH the suspect and force-destroy any leftover qemu + * of {@code vmUuid}, so the new VM start during HA is safe against split-brain even if Ceph + * watchers were transiently emptied by an OSD watch_ping timeout. + */ +public class FenceVmOnHostMsg extends NeedReplyMessage implements HostMessage { + /** Routing target: the peer (sibling) KVM host that will execute the fence. */ + private String hostUuid; + /** The suspect host whose qemu must be killed; supplied by the management node. */ + private String suspectHostUuid; + private String vmUuid; + + @Override + public String getHostUuid() { + return hostUuid; + } + + public void setHostUuid(String hostUuid) { + this.hostUuid = hostUuid; + } + + public String getSuspectHostUuid() { + return suspectHostUuid; + } + + public void setSuspectHostUuid(String suspectHostUuid) { + this.suspectHostUuid = suspectHostUuid; + } + + public String getVmUuid() { + return vmUuid; + } + + public void setVmUuid(String vmUuid) { + this.vmUuid = vmUuid; + } +} diff --git a/header/src/main/java/org/zstack/header/vm/FenceVmOnHostReply.java b/header/src/main/java/org/zstack/header/vm/FenceVmOnHostReply.java new file mode 100644 index 00000000000..83e43e78a6e --- /dev/null +++ b/header/src/main/java/org/zstack/header/vm/FenceVmOnHostReply.java @@ -0,0 +1,14 @@ +package org.zstack.header.vm; + +import org.zstack.header.message.MessageReply; + +/** + * ZSTAC-83890 - reply to {@link FenceVmOnHostMsg}. + * + * If a sibling kvmagent confirmed the suspect qemu is gone (or could not even reach the suspect + * host), the reply is a plain success and HA-start is allowed to proceed. If the sibling reported + * that qemu is still alive on the suspect host, the reply is a failure with a descriptive error, + * and HA-start is refused to prevent split-brain. + */ +public class FenceVmOnHostReply extends MessageReply { +} diff --git a/header/src/main/java/org/zstack/header/vm/HaStartVmInstanceMsg.java b/header/src/main/java/org/zstack/header/vm/HaStartVmInstanceMsg.java index a358249cdf9..918be23b9b5 100644 --- a/header/src/main/java/org/zstack/header/vm/HaStartVmInstanceMsg.java +++ b/header/src/main/java/org/zstack/header/vm/HaStartVmInstanceMsg.java @@ -13,6 +13,10 @@ public class HaStartVmInstanceMsg extends NeedReplyMessage implements VmInstance private String judgerClassName; private List softAvoidHostUuids; private String haReason; + // ZSTAC-83890 - Sibling KVM hosts already picked during HA decision (e.g. by HaKvmHostSiblingChecker). + // The HA-start flow uses one of them to SSH-fence any leftover qemu on the suspect host before allowing + // the new VM to start, hardening against transient empty-watcher / stale-qemu split-brain. + private List siblingHostUuids; public String getJudgerClassName() { return judgerClassName; @@ -46,4 +50,12 @@ public String getHaReason() { public void setHaReason(String haReason) { this.haReason = haReason; } + + public List getSiblingHostUuids() { + return siblingHostUuids; + } + + public void setSiblingHostUuids(List siblingHostUuids) { + this.siblingHostUuids = siblingHostUuids; + } } diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java b/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java index 91e5d885bd8..763c2cf57c1 100755 --- a/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java @@ -5040,4 +5040,61 @@ public void setMemoryUsage(long memoryUsage) { } } + /** + * ZSTAC-83890 / TIC-5513 + * + * Sent from MN to a sibling KVM host's kvmagent. The sibling SSHes to the suspect host + * (where the HA-started VM was running) and force-destroys any leftover qemu process + * before the new VM is allowed to start on a different host. + * + * Verdict is reported back via {@link FenceVmFromPeerRsp}. + */ + public static class FenceVmFromPeerCmd extends AgentCommand implements java.io.Serializable { + @GrayVersion(value = "5.4.8") + public String vmUuid; + + @GrayVersion(value = "5.4.8") + public String targetHostUuid; + + @GrayVersion(value = "5.4.8") + public String targetHostIp; + + @GrayVersion(value = "5.4.8") + public String targetHostUsername; + + @GrayVersion(value = "5.4.8") + @NoLogging + public String targetHostPassword; + + @GrayVersion(value = "5.4.8") + public Integer targetHostSshPort; + + @GrayVersion(value = "5.4.8") + public Integer sshTimeoutSec; + } + + /** + * ZSTAC-83890 / TIC-5513 - reply to {@link FenceVmFromPeerCmd}. + * + * Boxed Boolean fields are intentional: under @GrayVersion an older agent (< 5.4.8) returns no + * value here, and a primitive boolean would silently default to false, making "absent" + * indistinguishable from "explicit false". Consumers must treat null as "absent". + */ + public static class FenceVmFromPeerRsp extends AgentResponse { + @GrayVersion(value = "5.4.8") + public Boolean qemuConfirmedDead; + + @GrayVersion(value = "5.4.8") + public Boolean qemuStillAlive; + + @GrayVersion(value = "5.4.8") + public Boolean targetHostUnreachable; + + @GrayVersion(value = "5.4.8") + public String stdout; + + @GrayVersion(value = "5.4.8") + public String stderr; + } + } diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/KVMConstant.java b/plugin/kvm/src/main/java/org/zstack/kvm/KVMConstant.java index f40157764ea..33fe199e15e 100755 --- a/plugin/kvm/src/main/java/org/zstack/kvm/KVMConstant.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/KVMConstant.java @@ -73,6 +73,8 @@ public interface KVMConstant { String KVM_LOGOUT_ISCSI_PATH = "/iscsi/target/logout"; String KVM_LOGIN_ISCSI_PATH = "/iscsi/target/login"; String KVM_HARDEN_CONSOLE_PATH = "/vm/console/harden"; + /** ZSTAC-83890: sibling KVM host SSHes to suspect host and kills leftover qemu. */ + String KVM_HA_FENCE_VM_FROM_PEER_PATH = "/ha/fence/vm/from/peer"; String KVM_DELETE_CONSOLE_FIREWALL_PATH = "/vm/console/deletefirewall"; String KVM_UPDATE_HOST_OS_PATH = "/host/updateos"; String KVM_HOST_UPDATE_DEPENDENCY_PATH = "/host/updatedependency"; diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java b/plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java index ab8c29640f3..ba664121e9a 100755 --- a/plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java @@ -600,6 +600,8 @@ protected void handleLocalMessage(Message msg) { handle((RebootVmOnHypervisorMsg) msg); } else if (msg instanceof DestroyVmOnHypervisorMsg) { handle((DestroyVmOnHypervisorMsg) msg); + } else if (msg instanceof FenceVmOnHostMsg) { + handle((FenceVmOnHostMsg) msg); } else if (msg instanceof AttachVolumeToVmOnHypervisorMsg) { handle((AttachVolumeToVmOnHypervisorMsg) msg); } else if (msg instanceof DetachVolumeFromVmOnHypervisorMsg) { @@ -982,6 +984,104 @@ public void run(MessageReply reply) { }); } + private void handle(final FenceVmOnHostMsg msg) { + final FenceVmOnHostReply reply = new FenceVmOnHostReply(); + // ZSTAC-83890: this message is now routed by VmInstanceBase to the PEER (sibling) host's + // KVMHost service, not to the suspect's. So `self` here is the peer that will execute the + // fence, and the suspect is supplied by the caller via msg.getSuspectHostUuid(). + final String peerHostUuid = self.getUuid(); + final String suspectHostUuid = msg.getSuspectHostUuid(); + if (suspectHostUuid == null || suspectHostUuid.equals(peerHostUuid)) { + reply.setError(operr("HA-start vm[%s]: invalid pre-fence routing -- suspectHostUuid=[%s], peer=[%s].", + msg.getVmUuid(), suspectHostUuid, peerHostUuid)); + bus.reply(msg, reply); + return; + } + + KVMHostVO suspectVO = dbf.findByUuid(suspectHostUuid, KVMHostVO.class); + if (suspectVO == null) { + reply.setError(operr("HA-start vm[%s]: suspect KVM host[%s] no longer exists; refuse to start to prevent split-brain.", + msg.getVmUuid(), suspectHostUuid)); + bus.reply(msg, reply); + return; + } + + FenceVmFromPeerCmd cmd = new FenceVmFromPeerCmd(); + cmd.vmUuid = msg.getVmUuid(); + cmd.targetHostUuid = suspectHostUuid; + cmd.targetHostIp = suspectVO.getManagementIp(); + cmd.targetHostUsername = suspectVO.getUsername(); + cmd.targetHostPassword = suspectVO.getPassword(); + cmd.targetHostSshPort = suspectVO.getPort() != null ? suspectVO.getPort() : 22; + cmd.sshTimeoutSec = 20; + + KVMHostAsyncHttpCallMsg fmsg = new KVMHostAsyncHttpCallMsg(); + fmsg.setHostUuid(peerHostUuid); + fmsg.setPath(KVMConstant.KVM_HA_FENCE_VM_FROM_PEER_PATH); + fmsg.setCommand(cmd); + bus.makeTargetServiceIdByResourceUuid(fmsg, HostConstant.SERVICE_ID, peerHostUuid); + + logger.info(String.format("[HA pre-fence] vm[%s] peer[%s] fencing suspect host[%s ip=%s]", + msg.getVmUuid(), peerHostUuid, suspectHostUuid, suspectVO.getManagementIp())); + + bus.send(fmsg, new CloudBusCallBack(msg) { + @Override + public void run(MessageReply r) { + if (!r.isSuccess()) { + // Sibling-side error details may carry remote command output / credentials -- log + // the full error locally only and surface a redacted reason to upstream callers. + logger.warn(String.format("[HA pre-fence] vm[%s] transport error to sibling[%s] fencing suspect host[%s]: %s", + msg.getVmUuid(), peerHostUuid, suspectHostUuid, r.getError())); + reply.setError(operr("HA-start vm[%s]: transport error asking sibling[%s] to fence suspect host[%s]. " + + "Refuse to start to prevent split-brain. See management log for details.", + msg.getVmUuid(), peerHostUuid, suspectHostUuid)); + bus.reply(msg, reply); + return; + } + FenceVmFromPeerRsp rsp = ((KVMHostAsyncHttpCallReply) r).toResponse(FenceVmFromPeerRsp.class); + boolean confirmedDead = Boolean.TRUE.equals(rsp.qemuConfirmedDead); + boolean stillAlive = Boolean.TRUE.equals(rsp.qemuStillAlive); + boolean unreachable = Boolean.TRUE.equals(rsp.targetHostUnreachable); + int verdicts = (confirmedDead ? 1 : 0) + (stillAlive ? 1 : 0) + (unreachable ? 1 : 0); + + if (stillAlive) { + // Raw rsp.stdout/stderr come from the suspect host shell and may include credentials + // or sensitive paths -- keep them in the local log only, not in the error chain. + logger.warn(String.format("[HA pre-fence] vm[%s] qemu still alive on suspect[%s] via sibling[%s]; " + + "agent-error=[%s] stdout-len=%d stderr-len=%d", + msg.getVmUuid(), suspectHostUuid, peerHostUuid, + rsp.getError(), + rsp.stdout == null ? 0 : rsp.stdout.length(), + rsp.stderr == null ? 0 : rsp.stderr.length())); + reply.setError(operr("HA-start vm[%s]: qemu is still alive on suspect host[%s] after sibling[%s] " + + "force-destroy attempt. Refuse to start to prevent split-brain. " + + "See management log for fence agent output.", + msg.getVmUuid(), suspectHostUuid, peerHostUuid)); + bus.reply(msg, reply); + return; + } + if (!rsp.isSuccess() || verdicts != 1) { + // Either the agent reported failure without a stillAlive verdict, or the gray-version + // response is ambiguous (older agent returned all-null/all-false, or multiple verdicts + // set). Be conservative: refuse and log full payload locally. + logger.warn(String.format("[HA pre-fence] vm[%s] ambiguous fence verdict from sibling[%s] for suspect[%s]: " + + "success=%s confirmedDead=%s stillAlive=%s unreachable=%s agent-error=[%s]", + msg.getVmUuid(), peerHostUuid, suspectHostUuid, + rsp.isSuccess(), rsp.qemuConfirmedDead, rsp.qemuStillAlive, rsp.targetHostUnreachable, + rsp.getError())); + reply.setError(operr("HA-start vm[%s]: sibling[%s] returned an ambiguous fence verdict for suspect host[%s]. " + + "Refuse to start to prevent split-brain. See management log for details.", + msg.getVmUuid(), peerHostUuid, suspectHostUuid)); + bus.reply(msg, reply); + return; + } + logger.info(String.format("[HA pre-fence] vm[%s] cleared by sibling[%s]: confirmedDead=%s, targetUnreachable=%s", + msg.getVmUuid(), peerHostUuid, confirmedDead, unreachable)); + bus.reply(msg, reply); + } + }); + } + private void handle(CommitVolumeSnapshotOnHypervisorMsg msg) { inQueue().name(String.format("commit-volume-snapshot-on-kvm-%s", self.getUuid())) .asyncBackup(msg) @@ -6893,4 +6993,5 @@ public void fail(ErrorCode errorCode) { } }); } + } diff --git a/simulator/simulatorImpl/src/main/java/org/zstack/simulator/kvm/KVMSimulatorController.java b/simulator/simulatorImpl/src/main/java/org/zstack/simulator/kvm/KVMSimulatorController.java index 802fffbd702..b3f839635c0 100755 --- a/simulator/simulatorImpl/src/main/java/org/zstack/simulator/kvm/KVMSimulatorController.java +++ b/simulator/simulatorImpl/src/main/java/org/zstack/simulator/kvm/KVMSimulatorController.java @@ -754,6 +754,16 @@ private void getHostNuma(HttpEntity entity) { } + @RequestMapping(value = KVMConstant.KVM_HA_FENCE_VM_FROM_PEER_PATH, method = RequestMethod.POST) + public @ResponseBody String fenceVmFromPeer(HttpServletRequest req) { + HttpEntity entity = restf.httpServletRequestToHttpEntity(req); + FenceVmFromPeerRsp rsp = new FenceVmFromPeerRsp(); + rsp.qemuConfirmedDead = true; + rsp.setSuccess(true); + replyer.reply(entity, rsp); + return null; + } + @ExceptionHandler(Exception.class) public ModelAndView handleAllException(Exception ex) { logger.warn(ex.getMessage(), ex); diff --git a/testlib/src/main/java/org/zstack/testlib/KVMSimulator.groovy b/testlib/src/main/java/org/zstack/testlib/KVMSimulator.groovy index 6e77c2dfeda..48627f21297 100755 --- a/testlib/src/main/java/org/zstack/testlib/KVMSimulator.groovy +++ b/testlib/src/main/java/org/zstack/testlib/KVMSimulator.groovy @@ -298,6 +298,16 @@ class KVMSimulator implements Simulator { return rsp } + // ZSTAC-83890 / TIC-5513: HA pre-fence default — peer reports qemu confirmed dead so the + // pre-fence flow lets HA-start proceed. Tests that want to assert the refusal path can + // override this stub via env.simulator(...) themselves. + spec.simulator(KVMConstant.KVM_HA_FENCE_VM_FROM_PEER_PATH) { HttpEntity e -> + def rsp = new KVMAgentCommands.FenceVmFromPeerRsp() + rsp.qemuConfirmedDead = true + rsp.success = true + return rsp + } + spec.simulator(KVMConstant.KVM_CONNECT_PATH) { HttpEntity e -> Spec.checkHttpCallType(e, true) KVMAgentCommands.ConnectCmd cmd = JSONObjectUtil.toObject(e.body, KVMAgentCommands.ConnectCmd.class)