Skip to content
Closed
  •  
  •  
  •  
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,12 @@ public abstract class AbstractVmInstance implements VmInstance {
APIDestroyVmInstanceMsg.class.getName(),
DestroyVmInstanceMsg.class.getName());

// Registering state: only metadata-related reads, destroy (for cleanup/rollback),
// and ChangeVmMetaDataMsg (for state transitions during registration) are allowed.
allowedOperations.addState(VmInstanceState.Registering,
ChangeVmMetaDataMsg.class.getName(),
APIDestroyVmInstanceMsg.class.getName(),
DestroyVmInstanceMsg.class.getName());

stateChangeChecker.addState(VmInstanceStateEvent.unknown.toString(),
VmInstanceState.Created.toString(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package org.zstack.compute.vm;

import org.springframework.beans.factory.annotation.Autowire;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Configurable;
import org.zstack.core.db.Q;
import org.zstack.header.core.Completion;
import org.zstack.header.core.workflow.FlowTrigger;
import org.zstack.header.core.workflow.NoRollbackFlow;
import org.zstack.header.errorcode.ErrorCode;
import org.zstack.header.vm.MetadataStorageHandler;
import org.zstack.header.vm.VmInstanceConstant;
import org.zstack.header.vm.VmInstanceSpec;
import org.zstack.header.volume.VolumeVO;
import org.zstack.header.volume.VolumeVO_;
import org.zstack.header.volume.VolumeType;
import org.zstack.utils.Utils;
import org.zstack.utils.logging.CLogger;

import java.util.Map;

/**
* VM 彻底删除(Expunge)时清理主存储上的元数据文件。
*
* <p>设计要点(Part 02b §8.3):</p>
* <ul>
* <li>在 ExpungeVm 流程链中执行,位于 Root/Memory/Cache Volume 删除之后</li>
* <li>通过根卷所在 PS 定位元数据位置</li>
* <li><b>best-effort</b>:删除失败仅 WARN 日志,不阻塞 VM 物理清除</li>
* <li>dirty 行由 FK CASCADE 自动清理,本 Flow 不处理</li>
* </ul>
*
* <p>删除时机说明(Δ-5):元数据在 Expunge(物理删除)而非 Destroy(软删除)
* 阶段清理。Destroy 时 VM 可通过 Recover 恢复,过早删除会导致恢复后元数据丢失。</p>
*/
@Configurable(preConstruction = true, autowire = Autowire.BY_TYPE)
public class VmExpungeMetadataFlow extends NoRollbackFlow {
private static final CLogger logger = Utils.getLogger(VmExpungeMetadataFlow.class);

@Autowired
private MetadataStorageHandler metadataStorageHandler;

@Override
public void run(FlowTrigger trigger, Map data) {
final VmInstanceSpec spec = (VmInstanceSpec) data.get(VmInstanceConstant.Params.VmInstanceSpec.toString());
final String vmUuid = spec.getVmInventory().getUuid();

// 功能开关检查:即使功能关闭,也尝试清理已有的元数据文件(best-effort)
// 不检查 VM_METADATA 开关——Expunge 是不可逆操作,应始终尝试清理残留

// 通过根卷查找 PS UUID
String rootVolumeUuid = spec.getVmInventory().getRootVolumeUuid();
if (rootVolumeUuid == null) {
// VM 处于中间状态,无根卷,跳过
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] has no root volume, skipping metadata cleanup", vmUuid));
trigger.next();
return;
}

String psUuid = Q.New(VolumeVO.class)
.eq(VolumeVO_.uuid, rootVolumeUuid)
.select(VolumeVO_.primaryStorageUuid)
.findValue();

if (psUuid == null) {
// 根卷已被删除或无 PS 信息,跳过
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] root volume[uuid:%s] has no primaryStorageUuid, " +
"skipping metadata cleanup", vmUuid, rootVolumeUuid));
trigger.next();
return;
}

logger.info(String.format("[MetadataExpunge] deleting metadata for vm[uuid:%s] on ps[uuid:%s]", vmUuid, psUuid));

metadataStorageHandler.deleteMetadata(psUuid, vmUuid, new Completion(trigger) {
@Override
public void success() {
logger.info(String.format("[MetadataExpunge] metadata deleted for vm[uuid:%s] on ps[uuid:%s]", vmUuid, psUuid));
trigger.next();
}

@Override
public void fail(ErrorCode errorCode) {
// best-effort:失败不阻塞 VM 物理清除
logger.warn(String.format("[MetadataExpunge] failed to delete metadata for vm[uuid:%s] on ps[uuid:%s], " +
"continuing expunge. Error: %s", vmUuid, psUuid, errorCode));
trigger.next();
}
});
}
}
135 changes: 135 additions & 0 deletions compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,139 @@ public class VmGlobalConfig {
@GlobalConfigValidation(validValues = {"None", "AuthenticAMD"})
@BindResourceConfig(value = {VmInstanceVO.class})
public static GlobalConfig VM_CPUID_VENDOR = new GlobalConfig(CATEGORY, "vm.cpuid.vendor");

@GlobalConfigValidation(numberGreaterThan = 1)
public static GlobalConfig GC_INTERVAL = new GlobalConfig(CATEGORY, "deletion.gcInterval");

@GlobalConfigValidation(validValues = {"true", "false"})
public static GlobalConfig VM_METADATA = new GlobalConfig(CATEGORY, "vm.metadata");

@GlobalConfigDef(defaultValue = "5", type = Integer.class,
description = "Max concurrent metadata writes per primary storage per MN")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PS_MAX_CONCURRENT = new GlobalConfig(CATEGORY, "vm.metadata.ps.maxConcurrent");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Max concurrent VM metadata updates globally per MN")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_GLOBAL_MAX_CONCURRENT = new GlobalConfig(CATEGORY, "vm.metadata.global.maxConcurrent");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Initial GC delay in seconds after API success")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_GC_INITIAL_DELAY_SEC = new GlobalConfig(CATEGORY, "vm.metadata.gc.initialDelaySec");

@GlobalConfigDef(defaultValue = "5", type = Integer.class,
description = "Max retry count before giving up metadata flush")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.maxRetry");

@GlobalConfigDef(defaultValue = "5", type = Long.class,
description = "Dirty poller interval in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DIRTY_POLL_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.dirty.pollIntervalSec");

@GlobalConfigDef(defaultValue = "20", type = Integer.class,
description = "Max dirty rows to claim per poller cycle")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DIRTY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.dirty.batchSize");

@GlobalConfigDef(defaultValue = "300", type = Long.class,
description = "Path fingerprint check interval in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PATH_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.pathCheck.intervalSec");

@GlobalConfigDef(defaultValue = "500", type = Integer.class,
description = "Path fingerprint check keyset pagination batch size")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PATH_CHECK_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.pathCheck.batchSize");

@GlobalConfigDef(defaultValue = "600", type = Long.class,
description = "Delay in seconds before full refresh after upgrade, waiting for rolling upgrade to complete")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_UPGRADE_REFRESH_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.upgrade.refreshDelaySec");

@GlobalConfigDef(defaultValue = "1000", type = Integer.class,
description = "Upgrade full refresh SQL batch size")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_UPGRADE_REFRESH_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.upgrade.refreshBatchSize");

@GlobalConfigDef(defaultValue = "5", type = Long.class,
description = "Delay in seconds after nodeLeft before takeover, reduces zombie MN race condition")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_NODE_LEFT_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.nodeLeft.delaySec");

@GlobalConfigDef(defaultValue = "1800", type = Long.class,
description = "MetadataStaleRecoveryTask scan interval in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_RECOVERY_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.intervalSec");

@GlobalConfigDef(defaultValue = "100", type = Integer.class,
description = "MetadataStaleRecoveryTask rows per scan batch")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_RECOVERY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.batchSize");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Max consecutive stale recovery cycles per VM before circuit-break")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_RECOVERY_MAX_CYCLES = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.maxCycles");

@GlobalConfigDef(defaultValue = "45", type = Long.class,
description = "Pending API timeout cleanup threshold in minutes")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PENDING_API_TIMEOUT = new GlobalConfig(CATEGORY, "vm.metadata.pendingApi.timeoutMinutes");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Exponential backoff base delay in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_RETRY_BASE_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.retry.baseDelaySeconds");

@GlobalConfigDef(defaultValue = "10", type = Integer.class,
description = "Exponential backoff max exponent")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_RETRY_MAX_EXPONENT = new GlobalConfig(CATEGORY, "vm.metadata.retry.maxExponent");

@GlobalConfigDef(defaultValue = "200", type = Integer.class,
description = "Batch size per round when enabling metadata (false to true init)")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_INIT_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.init.batchSize");

@GlobalConfigDef(defaultValue = "5", type = Long.class,
description = "Delay in seconds between init batches to prevent IO storm")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_INIT_BATCH_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.init.batchDelaySec");

@GlobalConfigDef(defaultValue = "3600", type = Long.class,
description = "Orphan metadata detection interval in seconds")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_ORPHAN_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.orphanCheck.intervalSec");

@GlobalConfigDef(defaultValue = "15", type = Long.class,
description = "Zombie claim threshold in minutes: claimed dirty rows older than this are released")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_ZOMBIE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.zombieClaim.thresholdMinutes");

@GlobalConfigDef(defaultValue = "30", type = Long.class,
description = "Stale claim threshold in minutes for background recovery task")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.staleClaim.thresholdMinutes");

@GlobalConfigDef(defaultValue = "10", type = Long.class,
description = "Inline stale claim takeover threshold in minutes for triggerFlushForVm hot path")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_TRIGGER_FLUSH_STALE = new GlobalConfig(CATEGORY, "vm.metadata.triggerFlush.staleMinutes");

@GlobalConfigDef(defaultValue = "3", type = Integer.class,
description = "Max retry count for deleteMetadata in ExpungeVmInstanceFlow")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DELETE_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.delete.maxRetry");

@GlobalConfigDef(defaultValue = "30", type = Long.class,
description = "Base delay in seconds for deleteMetadata retry backoff")
@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DELETE_BASE_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.delete.baseDelaySec");

@GlobalConfigDef(defaultValue = "", type = String.class,
description = "Last completed upgrade refresh version, prevents duplicate triggers across MNs. Internal use only")
public static GlobalConfig VM_METADATA_LAST_REFRESH_VERSION = new GlobalConfig(CATEGORY, "vm.metadata.lastRefreshVersion");
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.zstack.header.message.APIMessage;
import org.zstack.header.network.l2.*;
import org.zstack.header.network.l3.*;
import org.zstack.header.storage.primary.APIRegisterVmInstanceMsg;
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO;
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO_;
import org.zstack.header.storage.snapshot.VolumeSnapshotVO;
Expand Down
Loading