Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package org.zstack.compute.vm;

import org.springframework.beans.factory.annotation.Autowire;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Configurable;
import org.zstack.core.cloudbus.CloudBus;
import org.zstack.core.cloudbus.CloudBusCallBack;
import org.zstack.core.componentloader.PluginRegistry;
import org.zstack.core.db.Q;
import org.zstack.header.core.workflow.FlowTrigger;
import org.zstack.header.core.workflow.NoRollbackFlow;
import org.zstack.header.message.MessageReply;
import org.zstack.header.storage.primary.CleanupVmInstanceMetadataOnPrimaryStorageMsg;
import org.zstack.header.storage.primary.PrimaryStorageConstant;
import org.zstack.header.storage.primary.PrimaryStorageVO;
import org.zstack.header.storage.primary.PrimaryStorageVO_;
import org.zstack.header.vm.VmInstanceConstant;
import org.zstack.header.vm.VmInstanceSpec;
import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint;
import org.zstack.header.volume.VolumeVO;
import org.zstack.header.volume.VolumeVO_;
import org.zstack.utils.Utils;
import org.zstack.utils.logging.CLogger;

import java.util.Map;

@Configurable(preConstruction = true, autowire = Autowire.BY_TYPE)
public class VmExpungeMetadataFlow extends NoRollbackFlow {
private static final CLogger logger = Utils.getLogger(VmExpungeMetadataFlow.class);

@Autowired
private CloudBus bus;
@Autowired
private PluginRegistry pluginRgty;

@Override
public void run(FlowTrigger trigger, Map data) {
final VmInstanceSpec spec = (VmInstanceSpec) data.get(VmInstanceConstant.Params.VmInstanceSpec.toString());
if (spec == null || spec.getVmInventory() == null) {
logger.warn("[MetadataExpunge] missing VmInstanceSpec or VmInventory, skip metadata cleanup");
trigger.next();
return;
}

final String vmUuid = spec.getVmInventory().getUuid();

String rootVolumeUuid = spec.getVmInventory().getRootVolumeUuid();
if (rootVolumeUuid == null) {
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] has no root volume, skipping metadata cleanup", vmUuid));
trigger.next();
return;
}

String psUuid = Q.New(VolumeVO.class).eq(VolumeVO_.uuid, rootVolumeUuid).select(VolumeVO_.primaryStorageUuid).findValue();
if (psUuid == null) {
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] root volume[uuid:%s] has no primaryStorageUuid, " +
"skipping metadata cleanup", vmUuid, rootVolumeUuid));
trigger.next();
return;
}

String psType = Q.New(PrimaryStorageVO.class).select(PrimaryStorageVO_.type).eq(PrimaryStorageVO_.uuid, psUuid).findValue();
VmMetadataPathBuildExtensionPoint ext = pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class);
if (ext == null) {
trigger.next();
return;
}
String metadataPath = ext.buildVmMetadataPath(psUuid, vmUuid);

CleanupVmInstanceMetadataOnPrimaryStorageMsg cmsg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
cmsg.setPrimaryStorageUuid(psUuid);
cmsg.setVmUuid(vmUuid);
cmsg.setMetadataPath(metadataPath);
Comment on lines +70 to +73
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# 检查 CleanupVmInstanceMetadataOnPrimaryStorageMsg 的处理逻辑是否使用 rootVolumeUuid
rg -n "getRootVolumeUuid|rootVolumeUuid" --type java -C 3 | head -60

Repository: MatheMatrix/zstack

Length of output: 7118


🏁 Script executed:

#!/bin/bash
# Find CleanupVmInstanceMetadataOnPrimaryStorageMsg class definition
fd "CleanupVmInstanceMetadataOnPrimaryStorageMsg" --type f --extension java

Repository: MatheMatrix/zstack

Length of output: 166


🏁 Script executed:

#!/bin/bash
# Search for the message class and its methods
rg "class CleanupVmInstanceMetadataOnPrimaryStorageMsg" -A 20 --type java

Repository: MatheMatrix/zstack

Length of output: 2882


🏁 Script executed:

#!/bin/bash
# Search for handlers/processors of this message
rg "CleanupVmInstanceMetadataOnPrimaryStorageMsg" --type java -B 2 -A 5

Repository: MatheMatrix/zstack

Length of output: 14118


🏁 Script executed:

#!/bin/bash
# Check the complete VmExpungeMetadataFlow.java around lines 40-75 to see if rootVolumeUuid is available
cat -n compute/src/main/java/org/zstack/compute/vm/VmExpungeMetadataFlow.java | sed -n '40,75p'

Repository: MatheMatrix/zstack

Length of output: 2062


设置 rootVolumeUuid 到消息中

LocalStorageBase.handle() 方法使用 msg.getRootVolumeUuid() 来确定处理主机,但当前代码未设置该字段。虽然 NFS 存储实现不需要此字段,但本地存储实现依赖它来解析主机。rootVolumeUuid 在第 47 行已获取,应在第 73 行后添加 cmsg.setRootVolumeUuid(rootVolumeUuid);

修复方案
 CleanupVmInstanceMetadataOnPrimaryStorageMsg cmsg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
 cmsg.setPrimaryStorageUuid(psUuid);
 cmsg.setVmUuid(vmUuid);
 cmsg.setMetadataPath(metadataPath);
+cmsg.setRootVolumeUuid(rootVolumeUuid);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
CleanupVmInstanceMetadataOnPrimaryStorageMsg cmsg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
cmsg.setPrimaryStorageUuid(psUuid);
cmsg.setVmUuid(vmUuid);
cmsg.setMetadataPath(metadataPath);
CleanupVmInstanceMetadataOnPrimaryStorageMsg cmsg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
cmsg.setPrimaryStorageUuid(psUuid);
cmsg.setVmUuid(vmUuid);
cmsg.setMetadataPath(metadataPath);
cmsg.setRootVolumeUuid(rootVolumeUuid);
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@compute/src/main/java/org/zstack/compute/vm/VmExpungeMetadataFlow.java`
around lines 70 - 73, The CleanupVmInstanceMetadataOnPrimaryStorageMsg is
missing the rootVolumeUuid field required by LocalStorageBase.handle() which
uses msg.getRootVolumeUuid() to locate the host; after the existing
cmsg.setMetadataPath(metadataPath) call, set the rootVolumeUuid by invoking
cmsg.setRootVolumeUuid(rootVolumeUuid) so the message includes the root volume
UUID used by local storage handling.

bus.makeTargetServiceIdByResourceUuid(cmsg, PrimaryStorageConstant.SERVICE_ID, psUuid);
bus.send(cmsg, new CloudBusCallBack(trigger) {
@Override
public void run(MessageReply reply) {
if (reply.isSuccess()) {
logger.info(String.format("[MetadataExpunge] successfully deleted metadata for vm[uuid:%s] on ps[uuid:%s]",
vmUuid, psUuid));
} else {
// best-effort: do not fail the expunge flow, MetadataStorageOrphanDetector will clean up later
logger.warn(String.format("[MetadataExpunge] failed to delete metadata for vm[uuid:%s] on ps[uuid:%s]: %s",
vmUuid, psUuid, reply.getError()));
}
trigger.next();
}
});
}
}
85 changes: 85 additions & 0 deletions compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,89 @@ public class VmGlobalConfig {
@GlobalConfigValidation(validValues = {"None", "AuthenticAMD"})
@BindResourceConfig(value = {VmInstanceVO.class})
public static GlobalConfig VM_CPUID_VENDOR = new GlobalConfig(CATEGORY, "vm.cpuid.vendor");



@GlobalConfigValidation(validValues = {"true", "false"})
public static GlobalConfig VM_METADATA = new GlobalConfig(CATEGORY, "vm.metadata");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PS_MAX_CONCURRENT = new GlobalConfig(CATEGORY, "vm.metadata.ps.maxConcurrent");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_GLOBAL_MAX_CONCURRENT = new GlobalConfig(CATEGORY, "vm.metadata.global.maxConcurrent");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_GC_INITIAL_DELAY_SEC = new GlobalConfig(CATEGORY, "vm.metadata.gc.initialDelaySec");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.maxRetry");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DIRTY_POLL_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.dirty.pollIntervalSec");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DIRTY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.dirty.batchSize");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_UPGRADE_REFRESH_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.upgrade.refreshDelaySec");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_UPGRADE_REFRESH_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.upgrade.refreshBatchSize");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_NODE_LEFT_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.nodeLeft.delaySec");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_RECOVERY_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.intervalSec");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_RECOVERY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.batchSize");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_RECOVERY_MAX_CYCLES = new GlobalConfig(CATEGORY, "vm.metadata.staleRecovery.maxCycles");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PENDING_API_TIMEOUT = new GlobalConfig(CATEGORY, "vm.metadata.pendingApi.timeoutMinutes");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_RETRY_BASE_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.retry.baseDelaySeconds");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_RETRY_MAX_EXPONENT = new GlobalConfig(CATEGORY, "vm.metadata.retry.maxExponent");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_INIT_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.init.batchSize");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_INIT_BATCH_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.init.batchDelaySec");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_ORPHAN_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.orphanCheck.intervalSec");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_ZOMBIE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.zombieClaim.thresholdMinutes");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STALE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.staleClaim.thresholdMinutes");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_TRIGGER_FLUSH_STALE = new GlobalConfig(CATEGORY, "vm.metadata.triggerFlush.staleMinutes");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DELETE_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.delete.maxRetry");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_DELETE_BASE_DELAY = new GlobalConfig(CATEGORY, "vm.metadata.delete.baseDelaySec");

public static GlobalConfig VM_METADATA_LAST_REFRESH_VERSION = new GlobalConfig(CATEGORY, "vm.metadata.lastRefreshVersion");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_CONTENT_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.contentCheck.intervalSec");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_CONTENT_CHECK_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.contentCheck.batchSize");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_STORAGE_ORPHAN_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.storageOrphanCheck.intervalSec");
}
4 changes: 4 additions & 0 deletions compute/src/main/java/org/zstack/compute/vm/VmSystemTags.java
Original file line number Diff line number Diff line change
Expand Up @@ -307,4 +307,8 @@ public String desensitizeTag(SystemTag systemTag, String tag) {
}

public static PatternedSystemTag VM_STATE_PAUSED_AFTER_MIGRATE = new PatternedSystemTag(("vmPausedAfterMigrate"), VmInstanceVO.class);

public static String VM_METADATA_REGISTERING_MN_UUID_TOKEN = "registeringMnUuid";
public static PatternedSystemTag VM_METADATA_REGISTERING_MN_UUID = new PatternedSystemTag(
String.format("vmMetadata::registeringMnUuid::{%s}", VM_METADATA_REGISTERING_MN_UUID_TOKEN), VmInstanceVO.class);
}
24 changes: 24 additions & 0 deletions conf/db/upgrade/V5.0.0__schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
CREATE TABLE IF NOT EXISTS `zstack`.`VmMetadataDirtyVO` (
`vmInstanceUuid` VARCHAR(32) NOT NULL,
`managementNodeUuid` VARCHAR(32) DEFAULT NULL,
`dirtyVersion` BIGINT NOT NULL DEFAULT 1,
`lastClaimTime` TIMESTAMP NULL DEFAULT NULL,
`storageStructureChange` TINYINT(1) NOT NULL DEFAULT 0,
`retryCount` INT NOT NULL DEFAULT 0,
`nextRetryTime` TIMESTAMP NULL DEFAULT NULL,
`lastOpDate` timestamp on update CURRENT_TIMESTAMP,
`createDate` timestamp,
PRIMARY KEY (`vmInstanceUuid`),
CONSTRAINT `fkVmMetadataDirtyVOVmInstanceEO` FOREIGN KEY (`vmInstanceUuid`) REFERENCES `VmInstanceEO` (`uuid`) ON DELETE CASCADE,
CONSTRAINT `fkVmMetadataDirtyVOManagementNodeVO` FOREIGN KEY (`managementNodeUuid`) REFERENCES `ManagementNodeVO` (`uuid`) ON DELETE SET NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE IF NOT EXISTS `zstack`.`VmMetadataFingerprintVO` (
`vmInstanceUuid` VARCHAR(32) NOT NULL,
`metadataSnapshot` LONGTEXT,
`lastFlushTime` TIMESTAMP NULL DEFAULT NULL,
`lastFlushFailed` TINYINT(1) NOT NULL DEFAULT 0,
`staleRecoveryCount` INT NOT NULL DEFAULT 0,
PRIMARY KEY (`vmInstanceUuid`),
CONSTRAINT `fkVmMetadataFingerprintVOVmInstanceEO` FOREIGN KEY (`vmInstanceUuid`) REFERENCES `VmInstanceEO` (`uuid`) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
Loading