From 98dfa86c862b8a875227761c8f5f7549c3dfe91d Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 12:34:22 -0700 Subject: [PATCH 01/15] adding proper lmcache support --- conf/experimental/ai_dynamo/test/sglang.toml | 30 +++++++--------- conf/experimental/ai_dynamo/test/vllm.toml | 34 ++++++++----------- src/cloudai/workloads/ai_dynamo/__init__.py | 2 ++ src/cloudai/workloads/ai_dynamo/ai_dynamo.py | 25 +++++++++++++- src/cloudai/workloads/ai_dynamo/ai_dynamo.sh | 25 +++++++++++++- .../ai_dynamo/slurm_command_gen_strategy.py | 24 +++++++++++-- .../test_command_gen_strategy_slurm.py | 32 +++++++++++++++++ 7 files changed, 130 insertions(+), 42 deletions(-) diff --git a/conf/experimental/ai_dynamo/test/sglang.toml b/conf/experimental/ai_dynamo/test/sglang.toml index 37b2c392b..e2d2460ac 100644 --- a/conf/experimental/ai_dynamo/test/sglang.toml +++ b/conf/experimental/ai_dynamo/test/sglang.toml @@ -22,6 +22,18 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] [cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.1.1" workloads = "aiperf.sh" +lmcache_config = ''' +chunk_size: 256 +local_cpu: true +max_local_cpu_size: 6.0 +nixl_buffer_size: 2079377920 +nixl_buffer_device: "cpu" +extra_config: + enable_nixl_storage: false + nixl_backend: "POSIX" + nixl_path: "/tmp/" + nixl_pool_size: 2048 +''' [cmd_args.dynamo] backend = "sglang" @@ -59,24 +71,6 @@ workloads = "aiperf.sh" host = "0.0.0.0" disaggregation-transfer-backend = "nixl" - [cmd_args.lmcache] - controller_cmd = "lmcache_controller --host localhost --port 9000 --monitor-port 9001" - - [cmd_args.lmcache.args] - chunk_size = 256 - local_cpu = false - nixl_buffer_size = 10737418240 - nixl_buffer_device = "cuda" - extra_config_enable_nixl_storage = true - extra_config_nixl_backend = "GDS_MT" - extra_config_nixl_file_pool_size = 64 - - enable_controller = true - lmcache_instance_id = "lmcache_default_instance" - controller_url = "localhost:9001" - lmcache_worker_port = 8788 - distributed_url = "localhost:8789" - [cmd_args.genai_perf] cmd = "genai-perf profile" extra-args = "--streaming --verbose -- -v --async" diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml index 583d11a88..bb9748e08 100644 --- a/conf/experimental/ai_dynamo/test/vllm.toml +++ b/conf/experimental/ai_dynamo/test/vllm.toml @@ -22,6 +22,18 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] [cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1" workloads = "aiperf.sh" +lmcache_config = ''' +chunk_size: 256 +local_cpu: true +max_local_cpu_size: 6.0 +nixl_buffer_size: 2079377920 +nixl_buffer_device: "cpu" +extra_config: + enable_nixl_storage: false + nixl_backend: "POSIX" + nixl_path: "/tmp/" + nixl_pool_size: 2048 +''' [cmd_args.dynamo] backend = "vllm" @@ -38,7 +50,7 @@ workloads = "aiperf.sh" tensor-parallel-size = 8 pipeline-parallel-size = 1 data-parallel-size = 1 - kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' + kv-transfer-config = '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}' [cmd_args.dynamo.decode_worker] num-nodes = 1 @@ -51,25 +63,7 @@ workloads = "aiperf.sh" tensor-parallel-size = 8 pipeline-parallel-size = 1 data-parallel-size = 1 - kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' - - [cmd_args.lmcache] - controller_cmd = "lmcache_controller --host localhost --port 9000 --monitor-port 9001" - - [cmd_args.lmcache.args] - chunk_size = 256 - local_cpu = false - nixl_buffer_size = 10737418240 - nixl_buffer_device = "cuda" - extra_config_enable_nixl_storage = true - extra_config_nixl_backend = "GDS_MT" - extra_config_nixl_file_pool_size = 64 - - enable_controller = true - lmcache_instance_id = "lmcache_default_instance" - controller_url = "localhost:9001" - lmcache_worker_port = 8788 - distributed_url = "localhost:8789" + kv-transfer-config = '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}' [cmd_args.genai_perf] cmd = "genai-perf profile" diff --git a/src/cloudai/workloads/ai_dynamo/__init__.py b/src/cloudai/workloads/ai_dynamo/__init__.py index 4aac3fd2c..6ce965fbb 100644 --- a/src/cloudai/workloads/ai_dynamo/__init__.py +++ b/src/cloudai/workloads/ai_dynamo/__init__.py @@ -15,6 +15,7 @@ # limitations under the License. from .ai_dynamo import ( + LMCACHE_CONFIG_FILE_NAME, AIDynamoArgs, AIDynamoCmdArgs, AIDynamoTestDefinition, @@ -31,6 +32,7 @@ from .slurm_command_gen_strategy import AIDynamoSlurmCommandGenStrategy __all__ = [ + "LMCACHE_CONFIG_FILE_NAME", "AIDynamoArgs", "AIDynamoCmdArgs", "AIDynamoKubernetesJsonGenStrategy", diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 35da5b782..5ea805147 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -44,6 +44,7 @@ AIPERF_ARTIFACTS_DIR = "aiperf_artifacts" AIPERF_ACCURACY_ARTIFACTS_DIR = "aiperf_accuracy_artifacts" AIPERF_ACCURACY_RESULTS_CSV = "accuracy_results.csv" +LMCACHE_CONFIG_FILE_NAME = "lmcache-config.yaml" class Args(BaseModel): @@ -352,6 +353,18 @@ class AIDynamoCmdArgs(CmdArgs): docker_image_url: str storage_cache_dir: Optional[str | list[str]] = Field(default="/tmp", serialization_alias="storage_cache_dir") + lmcache_config_path: str | None = Field( + default=None, + validation_alias=AliasChoices("lmcache-config-path", "lmcache_config_path"), + serialization_alias="lmcache-config-path", + description="Path to an LMCache YAML config that is already available inside the container.", + ) + lmcache_config: str | None = Field( + default=None, + validation_alias=AliasChoices("lmcache-config", "lmcache_config"), + serialization_alias="lmcache-config", + description="Inline LMCache YAML config. CloudAI writes it to the run output and passes it to workers.", + ) dynamo: AIDynamoArgs lmcache: LMCache = Field(default_factory=LMCache) genai_perf: GenAIPerf = Field(default_factory=GenAIPerf) @@ -369,14 +382,24 @@ def validate_workloads(cls, v: str) -> str: raise ValueError(f"Invalid workload: {workload}. Available workloads: {allowed_workloads}") return ",".join(values) + @model_validator(mode="after") + def validate_lmcache_config(self) -> "AIDynamoCmdArgs": + if self.lmcache_config_path and self.lmcache_config: + raise ValueError("Only one of lmcache_config_path or lmcache_config can be set") + return self + @property def workloads_list(self) -> list[str]: return [w.strip() for w in self.workloads.split(",")] @property def installables(self) -> list[Installable]: + installables: list[Installable] = [] + if "lmcache" in self.model_fields_set: + installables.extend(self.lmcache.installables) + return [ - *self.lmcache.installables, + *installables, *self.genai_perf.installables, *self.aiperf.installables, *(self.aiperf_accuracy.installables if self.aiperf_accuracy else []), diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh index 5b65db41f..68c5ab266 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh @@ -266,10 +266,17 @@ _has_connector() { [[ ",$prefill_connectors," == *",$needle,"* ]] || [[ ",$decode_connectors," == *",$needle,"* ]] } +_has_lmcache_config() { + [[ -n "${lmcache_config["config-path"]:-}" ]] +} + _apply_connector_settings() { - if _has_connector "lmcache"; then + if _has_connector "lmcache" || _has_lmcache_config; then export ENABLE_LMCACHE=1 fi + if _has_lmcache_config; then + export LMCACHE_CONFIG_FILE="${lmcache_config["config-path"]}" + fi if _has_connector "kvbm"; then export ENABLE_KVBM=1 fi @@ -860,6 +867,11 @@ function launch_lmcache_controller() return fi + if [[ -z "${lmcache_config["controller_cmd"]:-}" ]]; then + log "LMCache connector is set but no LMCache controller command is configured. Skipping controller launch." + return + fi + log "Launching LMCache controller with cmd: ${lmcache_config["controller_cmd"]}" ${lmcache_config["controller_cmd"]} > ${RESULTS_DIR}/lmcache_controller.log 2>&1 } @@ -965,6 +977,17 @@ function setup_lmcache() return fi + if _has_lmcache_config; then + log "Using explicit LMCache config file: ${lmcache_config["config-path"]}" + setup_cufile + return + fi + + if [[ -z "${lmcache_config["repo"]:-}" ]]; then + log "LMCache connector is set but no generated LMCache config is configured. Skipping setup_lmcache" + return + fi + _require_cmd uv local lmcache_path="${lmcache_config["repo"]}" log "Setting up LMCache; installing LMCache using: uv pip install $lmcache_path" diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py index c1a817853..e1f33dcec 100644 --- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py @@ -24,7 +24,7 @@ from cloudai.core import File, GitRepo from cloudai.systems.slurm import SlurmCommandGenStrategy -from .ai_dynamo import AIDynamoTestDefinition +from .ai_dynamo import LMCACHE_CONFIG_FILE_NAME, AIDynamoTestDefinition class AIDynamoSlurmCommandGenStrategy(SlurmCommandGenStrategy): @@ -87,6 +87,21 @@ def _get_nested_toml_args(self, base_model: BaseModel, prefix: str) -> List[str] return result + def _prepare_lmcache_config(self, td: AIDynamoTestDefinition) -> str | None: + if td.cmd_args.lmcache_config_path: + return td.cmd_args.lmcache_config_path + + if td.cmd_args.lmcache_config is None: + return None + + self.test_run.output_path.mkdir(parents=True, exist_ok=True) + config_path = self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME + config_path.write_text(td.cmd_args.lmcache_config) + return f"{self.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" + + def _should_emit_lmcache_args(self, td: AIDynamoTestDefinition) -> bool: + return "lmcache" in td.cmd_args.model_fields_set + def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: assert td.repo.installed_path args = [ @@ -103,6 +118,9 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: if td.cmd_args.storage_cache_dir: args.append(f"--storage-cache-dir {td.cmd_args.storage_cache_dir}") + if lmcache_config_path := self._prepare_lmcache_config(td): + args.append(f"--lmcache-config-path {shlex.quote(lmcache_config_path)}") + args.extend( self._get_toml_args( td.cmd_args.dynamo, @@ -118,7 +136,9 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: args.extend(self._get_nested_toml_args(td.cmd_args.dynamo.prefill_worker, "--prefill-")) args.extend(self._get_nested_toml_args(td.cmd_args.dynamo.decode_worker, "--decode-")) - args.extend(self._get_nested_toml_args(td.cmd_args.lmcache, "--lmcache-")) + if self._should_emit_lmcache_args(td): + args.extend(self._get_nested_toml_args(td.cmd_args.lmcache, "--lmcache-")) + args.extend(self._get_nested_toml_args(td.cmd_args.genai_perf, "--genai_perf-")) args.extend(self._get_nested_toml_args(td.cmd_args.aiperf, "--aiperf-")) if td.cmd_args.aiperf_accuracy is not None: diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 7b036b5a8..2c6d8bc0f 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -24,6 +24,7 @@ from cloudai.core import GitRepo from cloudai.systems.slurm import SlurmSystem from cloudai.workloads.ai_dynamo import ( + LMCACHE_CONFIG_FILE_NAME, AIDynamoArgs, AIDynamoCmdArgs, AIDynamoSlurmCommandGenStrategy, @@ -227,3 +228,34 @@ def test_gen_script_args_quotes_worker_json_args(strategy: AIDynamoSlurmCommandG assert f"--prefill-args-kv-transfer-config '{config}'" in result assert f"--decode-args-kv-transfer-config '{config}'" in result + + +def test_gen_script_args_writes_inline_lmcache_config(strategy: AIDynamoSlurmCommandGenStrategy) -> None: + td = cast(AIDynamoTestDefinition, strategy.test_run.test) + td.cmd_args.lmcache_config = "chunk_size: 256\nlocal_cpu: true\n" + + result = strategy._gen_script_args(td) + + config_path = strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME + assert f"--lmcache-config-path {strategy.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" in result + assert config_path.read_text() == td.cmd_args.lmcache_config + + +def test_gen_script_args_uses_container_lmcache_config_path(strategy: AIDynamoSlurmCommandGenStrategy) -> None: + td = cast(AIDynamoTestDefinition, strategy.test_run.test) + td.cmd_args.lmcache_config_path = "/opt/shared/lmcache/config.yaml" + + result = strategy._gen_script_args(td) + + assert "--lmcache-config-path /opt/shared/lmcache/config.yaml" in result + assert not (strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).exists() + + +def test_lmcache_config_path_and_inline_config_are_mutually_exclusive() -> None: + with pytest.raises(ValueError, match="Only one of lmcache_config_path or lmcache_config"): + AIDynamoCmdArgs( + docker_image_url="url", + dynamo=AIDynamoArgs(), + lmcache_config_path="/opt/shared/lmcache/config.yaml", + lmcache_config="chunk_size: 256\n", + ) From 96faa4674d4c678d77bcf402a7040e2a36d7ebec Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 13:01:26 -0700 Subject: [PATCH 02/15] lmcache fix --- conf/experimental/ai_dynamo/test/vllm.toml | 3 +- src/cloudai/workloads/ai_dynamo/ai_dynamo.py | 33 ++++++++++++------- src/cloudai/workloads/ai_dynamo/ai_dynamo.sh | 22 ++++++++++--- .../test_command_gen_strategy_slurm.py | 13 ++++++++ 4 files changed, 53 insertions(+), 18 deletions(-) diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml index bb9748e08..9f0b89e93 100644 --- a/conf/experimental/ai_dynamo/test/vllm.toml +++ b/conf/experimental/ai_dynamo/test/vllm.toml @@ -43,6 +43,7 @@ extra_config: num-nodes = 1 cmd = 'python3 -m dynamo.vllm --is-prefill-worker' worker-initialized-regex = 'VllmWorker.*has.been.initialized' + connector = "['lmcache', 'nixl']" extra-args = "--no-enable-expert-parallel" [cmd_args.dynamo.prefill_worker.args] @@ -50,7 +51,6 @@ extra_config: tensor-parallel-size = 8 pipeline-parallel-size = 1 data-parallel-size = 1 - kv-transfer-config = '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}' [cmd_args.dynamo.decode_worker] num-nodes = 1 @@ -63,7 +63,6 @@ extra_config: tensor-parallel-size = 8 pipeline-parallel-size = 1 data-parallel-size = 1 - kv-transfer-config = '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}' [cmd_args.genai_perf] cmd = "genai-perf profile" diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 5ea805147..60fba63e4 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -45,6 +45,20 @@ AIPERF_ACCURACY_ARTIFACTS_DIR = "aiperf_accuracy_artifacts" AIPERF_ACCURACY_RESULTS_CSV = "accuracy_results.csv" LMCACHE_CONFIG_FILE_NAME = "lmcache-config.yaml" +ALLOWED_CONNECTORS = ["kvbm", "lmcache", "nixl", "none"] + + +def validate_connector_value(v: str | list[str] | None) -> str | list[str] | None: + if v is None: + return v + + values = v if isinstance(v, list) else v.replace("[", "").replace("]", "").replace(",", " ").split() + values = [str(c).strip().strip("'\"") for c in values] + + for connector in values: + if connector not in ALLOWED_CONNECTORS: + raise ValueError(f"Invalid connector: {connector}. Available connectors: {ALLOWED_CONNECTORS}") + return v class Args(BaseModel): @@ -129,6 +143,12 @@ class WorkerConfig(BaseModel): default=1, serialization_alias="num-nodes", validation_alias=AliasChoices("num-nodes", "num_nodes") ) nodes: str | None = Field(default=None) + connector: Optional[str | list[str]] = None + + @field_validator("connector", mode="before") + @classmethod + def validate_connector(cls, v: str | list[str] | None) -> str | list[str] | None: + return validate_connector_value(v) args: WorkerBaseArgs = Field(default_factory=WorkerBaseArgs) @@ -152,18 +172,7 @@ class AIDynamoArgs(BaseModel): @field_validator("connector", mode="before") @classmethod def validate_connector(cls, v: str | list[str] | None) -> str | list[str] | None: - if v is None: - return v - allowed_connectors = ["kvbm", "lmcache", "nixl", "none"] - - # Connectors can be either a single string or a space-separated list. - values = v if isinstance(v, str) else " ".join(v) - values = [c.strip() for c in values.split(" ")] - - for connector in values: - if connector not in allowed_connectors: - raise ValueError(f"Invalid connector: {connector}. Available connectors: {allowed_connectors}") - return v + return validate_connector_value(v) workspace_path: str = Field( default="/workspace", diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh index 68c5ab266..46661aa3a 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh @@ -259,11 +259,15 @@ _set_nodelists() } _has_connector() { - # Check if a specific connector is in the comma-separated connector list. + # Check if a specific connector is in a comma/space/Python-list connector value. local needle="$1" - local prefill_connectors="${prefill_args["--connector"]:-}" - local decode_connectors="${decode_args["--connector"]:-}" - [[ ",$prefill_connectors," == *",$needle,"* ]] || [[ ",$decode_connectors," == *",$needle,"* ]] + local connectors="${prefill_args["--connector"]:-} ${decode_args["--connector"]:-}" + connectors="${connectors//[/ }" + connectors="${connectors//]/ }" + connectors="${connectors//,/ }" + connectors="${connectors//\'/ }" + connectors="${connectors//\"/ }" + [[ " ${connectors} " == *" ${needle} "* ]] } _has_lmcache_config() { @@ -402,6 +406,16 @@ function array_to_args() for key in "${!arr[@]}"; do shopt -s nocasematch val="${arr[$key]}" + # Handle Python-style list values: ['a', 'b'] -> --key a b + if [[ "$val" =~ ^\[.*\]$ ]]; then + local list_val="${val#[}" + list_val="${list_val%]}" + list_val="${list_val//,/ }" + list_val="${list_val//\'/}" + list_val="${list_val//\"/}" + result+="${key} ${list_val} " + continue + fi # Quote values that contain spaces if [[ "$val" == *" "* ]]; then val="${val//\"/\\\"}" # Escape existing quotes diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 2c6d8bc0f..061a07ab2 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -230,6 +230,19 @@ def test_gen_script_args_quotes_worker_json_args(strategy: AIDynamoSlurmCommandG assert f"--decode-args-kv-transfer-config '{config}'" in result +def test_gen_script_args_contains_prefill_connector_list(strategy: AIDynamoSlurmCommandGenStrategy) -> None: + td = cast(AIDynamoTestDefinition, strategy.test_run.test) + td.cmd_args.dynamo.prefill_worker.connector = ["lmcache", "nixl"] + + result = strategy._gen_script_args(td) + command = " ".join(result) + + assert "--prefill-connector" in command + assert "lmcache" in command + assert "nixl" in command + assert "--decode-connector" not in command + + def test_gen_script_args_writes_inline_lmcache_config(strategy: AIDynamoSlurmCommandGenStrategy) -> None: td = cast(AIDynamoTestDefinition, strategy.test_run.test) td.cmd_args.lmcache_config = "chunk_size: 256\nlocal_cpu: true\n" From a39e3cbac0086bcf4c4593345cb1f659c3fb3f2d Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 14:29:45 -0700 Subject: [PATCH 03/15] prepare pr --- conf/experimental/ai_dynamo/test/sglang.toml | 14 +-- conf/experimental/ai_dynamo/test/vllm.toml | 15 +--- .../ai_dynamo/test_scenario/vllm_lmcache.toml | 86 +++++++++++++++++++ doc/workloads/ai_dynamo.rst | 56 ++++++++++++ src/cloudai/workloads/ai_dynamo/ai_dynamo.py | 33 +++---- src/cloudai/workloads/ai_dynamo/ai_dynamo.sh | 10 --- .../test_command_gen_strategy_slurm.py | 13 --- 7 files changed, 156 insertions(+), 71 deletions(-) create mode 100644 conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml diff --git a/conf/experimental/ai_dynamo/test/sglang.toml b/conf/experimental/ai_dynamo/test/sglang.toml index e2d2460ac..1d8b80a8f 100644 --- a/conf/experimental/ai_dynamo/test/sglang.toml +++ b/conf/experimental/ai_dynamo/test/sglang.toml @@ -17,23 +17,11 @@ name = "sglang" description = "sglang backend" test_template_name = "AIDynamo" -extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] +extra_container_mounts = ["/run/udev:/run/udev"] [cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.1.1" workloads = "aiperf.sh" -lmcache_config = ''' -chunk_size: 256 -local_cpu: true -max_local_cpu_size: 6.0 -nixl_buffer_size: 2079377920 -nixl_buffer_device: "cpu" -extra_config: - enable_nixl_storage: false - nixl_backend: "POSIX" - nixl_path: "/tmp/" - nixl_pool_size: 2048 -''' [cmd_args.dynamo] backend = "sglang" diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml index 9f0b89e93..5f609e142 100644 --- a/conf/experimental/ai_dynamo/test/vllm.toml +++ b/conf/experimental/ai_dynamo/test/vllm.toml @@ -17,23 +17,11 @@ name = "vLLM" description = "vLLM backend" test_template_name = "AIDynamo" -extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] +extra_container_mounts = ["/run/udev:/run/udev"] [cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1" workloads = "aiperf.sh" -lmcache_config = ''' -chunk_size: 256 -local_cpu: true -max_local_cpu_size: 6.0 -nixl_buffer_size: 2079377920 -nixl_buffer_device: "cpu" -extra_config: - enable_nixl_storage: false - nixl_backend: "POSIX" - nixl_path: "/tmp/" - nixl_pool_size: 2048 -''' [cmd_args.dynamo] backend = "vllm" @@ -43,7 +31,6 @@ extra_config: num-nodes = 1 cmd = 'python3 -m dynamo.vllm --is-prefill-worker' worker-initialized-regex = 'VllmWorker.*has.been.initialized' - connector = "['lmcache', 'nixl']" extra-args = "--no-enable-expert-parallel" [cmd_args.dynamo.prefill_worker.args] diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml new file mode 100644 index 000000000..fda79b0c0 --- /dev/null +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name = "dynamo-vllm-lmcache" +job_status_check = false + +[[Tests]] +id = "test.disagg.single-node.lmcache-config" +name = "vLLM LMCache config propagation" +description = "Self-contained AIDynamo scenario showing inline LMCache YAML propagation to vLLM workers." +test_template_name = "AIDynamo" +time_limit = "00:10:00" +extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] + + [Tests.cmd_args] + docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1" + workloads = "aiperf.sh" + lmcache_config = ''' +chunk_size: 256 +local_cpu: true +max_local_cpu_size: 6.0 +nixl_buffer_size: 2079377920 +nixl_buffer_device: "cpu" +extra_config: + enable_nixl_storage: false + nixl_backend: "POSIX" + nixl_path: "/tmp/" + nixl_pool_size: 2048 +''' + + [Tests.cmd_args.dynamo] + backend = "vllm" + model = "Qwen/Qwen3-0.6B" + + [Tests.cmd_args.dynamo.prefill_worker] + num-nodes = 1 + cmd = 'python3 -m dynamo.vllm --is-prefill-worker' + worker-initialized-regex = 'VllmWorker.*has.been.initialized' + extra-args = "--no-enable-expert-parallel" + + [Tests.cmd_args.dynamo.prefill_worker.args] + gpu-memory-utilization = 0.8 + tensor-parallel-size = 4 + pipeline-parallel-size = 1 + data-parallel-size = 1 + + [Tests.cmd_args.dynamo.decode_worker] + num-nodes = 1 + cmd = 'python3 -m dynamo.vllm' + worker-initialized-regex = 'VllmWorker.*has.been.initialized' + extra-args = "--no-enable-expert-parallel" + + [Tests.cmd_args.dynamo.decode_worker.args] + gpu-memory-utilization = 0.8 + tensor-parallel-size = 4 + pipeline-parallel-size = 1 + data-parallel-size = 1 + + [Tests.cmd_args.aiperf] + [Tests.cmd_args.aiperf.args] + concurrency = 2 + extra-inputs = '{"min_tokens":10}' + output-tokens-mean = 500 + request-count = 50 + synthetic-input-tokens-mean = 300 + + [Tests.extra_env_vars] + UCX_LOG_LEVEL = "warn" + HF_HUB_OFFLINE = "0" + TRANSFORMERS_OFFLINE = "0" + HF_DATASETS_OFFLINE = "0" + DYNAMO_NODELIST = "$(scontrol show hostname $SLURM_JOB_NODELIST | tr -s '\\n' ',')" + UCX_TLS = "all" diff --git a/doc/workloads/ai_dynamo.rst b/doc/workloads/ai_dynamo.rst index c00449681..de3645631 100644 --- a/doc/workloads/ai_dynamo.rst +++ b/doc/workloads/ai_dynamo.rst @@ -110,6 +110,61 @@ To use genai-perf, set: output-tokens-mean = 500 request-count = 50 +Propagating LMCache Configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +AIDynamo can pass an LMCache YAML config to the worker processes by setting ``LMCACHE_CONFIG_FILE`` inside the +container. This only propagates the LMCache configuration; the vLLM/SGLang runtime still needs to be launched with the +appropriate LMCache or KV-transfer connector for that image/version. + +The preferred form is inline YAML in ``cmd_args.lmcache_config``. CloudAI writes the YAML to the run output directory, +mounts that directory as ``/cloudai_run_results``, and passes the generated file path to the workload script: + +.. code-block:: toml + + [cmd_args] + lmcache_config = ''' + chunk_size: 256 + local_cpu: true + max_local_cpu_size: 6.0 + nixl_buffer_size: 2079377920 + nixl_buffer_device: "cpu" + extra_config: + enable_nixl_storage: false + nixl_backend: "POSIX" + nixl_path: "/tmp/" + nixl_pool_size: 2048 + ''' + +For an example that uses test-in-scenario mode, see +``conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml``. Because the test is fully defined inside the scenario, +``--tests-dir`` is not required when running that example: + +.. code-block:: bash + + uv run cloudai run --system-config \ + --test-scenario conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml + +Alternatively, provide a path that already exists inside the container with ``cmd_args.lmcache_config_path``. Mount the +host file or its parent directory with ``extra_container_mounts``: + +.. code-block:: toml + + extra_container_mounts = ["/host/lmcache:/lmcache"] + + [cmd_args] + lmcache_config_path = "/lmcache/config.yaml" + +For multi-node LMCache storage tests, any path referenced by the LMCache YAML, such as ``nixl_path`` for POSIX-backed +storage, must be visible and writable from every node that is expected to share cached data. A node-local path such as +``/tmp`` is suitable only for single-node smoke tests or configuration propagation checks. + +The legacy ``[cmd_args.lmcache]`` section is still supported. It installs the configured LMCache repository, can generate +a simple LMCache config from structured TOML fields, and can launch ``lmcache_controller`` when ``enable_controller`` and +``controller_cmd`` are configured. Use this path for older/custom Dynamo images that still expect CloudAI to generate the +LMCache config and optionally start the controller. For arbitrary LMCache YAML, prefer ``lmcache_config`` or +``lmcache_config_path``. + Semantic Degradation With AIPerf Accuracy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -215,6 +270,7 @@ Supported Backends The following backends are available via the ``conf/experimental/ai_dynamo/test/`` directory: - **vLLM** (``vllm.toml``) — use with ``test_scenario/vllm_slurm.toml`` +- **vLLM with LMCache config propagation** — use self-contained scenario ``test_scenario/vllm_lmcache.toml`` - **sglang** (``sglang.toml``) — use with ``test_scenario/sglang_slurm.toml`` Both backends use ``aiperf`` as the default benchmark tool and support disaggregated prefill/decode. diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 60fba63e4..5ea805147 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -45,20 +45,6 @@ AIPERF_ACCURACY_ARTIFACTS_DIR = "aiperf_accuracy_artifacts" AIPERF_ACCURACY_RESULTS_CSV = "accuracy_results.csv" LMCACHE_CONFIG_FILE_NAME = "lmcache-config.yaml" -ALLOWED_CONNECTORS = ["kvbm", "lmcache", "nixl", "none"] - - -def validate_connector_value(v: str | list[str] | None) -> str | list[str] | None: - if v is None: - return v - - values = v if isinstance(v, list) else v.replace("[", "").replace("]", "").replace(",", " ").split() - values = [str(c).strip().strip("'\"") for c in values] - - for connector in values: - if connector not in ALLOWED_CONNECTORS: - raise ValueError(f"Invalid connector: {connector}. Available connectors: {ALLOWED_CONNECTORS}") - return v class Args(BaseModel): @@ -143,12 +129,6 @@ class WorkerConfig(BaseModel): default=1, serialization_alias="num-nodes", validation_alias=AliasChoices("num-nodes", "num_nodes") ) nodes: str | None = Field(default=None) - connector: Optional[str | list[str]] = None - - @field_validator("connector", mode="before") - @classmethod - def validate_connector(cls, v: str | list[str] | None) -> str | list[str] | None: - return validate_connector_value(v) args: WorkerBaseArgs = Field(default_factory=WorkerBaseArgs) @@ -172,7 +152,18 @@ class AIDynamoArgs(BaseModel): @field_validator("connector", mode="before") @classmethod def validate_connector(cls, v: str | list[str] | None) -> str | list[str] | None: - return validate_connector_value(v) + if v is None: + return v + allowed_connectors = ["kvbm", "lmcache", "nixl", "none"] + + # Connectors can be either a single string or a space-separated list. + values = v if isinstance(v, str) else " ".join(v) + values = [c.strip() for c in values.split(" ")] + + for connector in values: + if connector not in allowed_connectors: + raise ValueError(f"Invalid connector: {connector}. Available connectors: {allowed_connectors}") + return v workspace_path: str = Field( default="/workspace", diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh index 46661aa3a..ab51086c9 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh @@ -406,16 +406,6 @@ function array_to_args() for key in "${!arr[@]}"; do shopt -s nocasematch val="${arr[$key]}" - # Handle Python-style list values: ['a', 'b'] -> --key a b - if [[ "$val" =~ ^\[.*\]$ ]]; then - local list_val="${val#[}" - list_val="${list_val%]}" - list_val="${list_val//,/ }" - list_val="${list_val//\'/}" - list_val="${list_val//\"/}" - result+="${key} ${list_val} " - continue - fi # Quote values that contain spaces if [[ "$val" == *" "* ]]; then val="${val//\"/\\\"}" # Escape existing quotes diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 061a07ab2..2c6d8bc0f 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -230,19 +230,6 @@ def test_gen_script_args_quotes_worker_json_args(strategy: AIDynamoSlurmCommandG assert f"--decode-args-kv-transfer-config '{config}'" in result -def test_gen_script_args_contains_prefill_connector_list(strategy: AIDynamoSlurmCommandGenStrategy) -> None: - td = cast(AIDynamoTestDefinition, strategy.test_run.test) - td.cmd_args.dynamo.prefill_worker.connector = ["lmcache", "nixl"] - - result = strategy._gen_script_args(td) - command = " ".join(result) - - assert "--prefill-connector" in command - assert "lmcache" in command - assert "nixl" in command - assert "--decode-connector" not in command - - def test_gen_script_args_writes_inline_lmcache_config(strategy: AIDynamoSlurmCommandGenStrategy) -> None: td = cast(AIDynamoTestDefinition, strategy.test_run.test) td.cmd_args.lmcache_config = "chunk_size: 256\nlocal_cpu: true\n" From e254226048bcb259d24f86781a9e87e8f3869ee2 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 15:24:04 -0700 Subject: [PATCH 04/15] refactor --- .../ai_dynamo/test_scenario/vllm_lmcache.toml | 27 +++--- doc/workloads/ai_dynamo.rst | 36 ++++--- src/cloudai/workloads/ai_dynamo/ai_dynamo.py | 56 ++--------- src/cloudai/workloads/ai_dynamo/ai_dynamo.sh | 94 ++----------------- .../ai_dynamo/slurm_command_gen_strategy.py | 78 ++++++++------- tests/ref_data/ai-dynamo.sbatch | 16 +--- tests/test_acceptance.py | 10 -- .../test_command_gen_strategy_slurm.py | 48 +++++----- .../test_json_gen_strategy_kubernetes.py | 3 - .../ai_dynamo/test_report_gen_strategy.py | 6 -- 10 files changed, 121 insertions(+), 253 deletions(-) diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml index fda79b0c0..7f6ca5bd4 100644 --- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -20,7 +20,7 @@ job_status_check = false [[Tests]] id = "test.disagg.single-node.lmcache-config" name = "vLLM LMCache config propagation" -description = "Self-contained AIDynamo scenario showing inline LMCache YAML propagation to vLLM workers." +description = "Self-contained AIDynamo scenario showing LMCache config generation and propagation to vLLM workers." test_template_name = "AIDynamo" time_limit = "00:10:00" extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] @@ -28,18 +28,19 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] [Tests.cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1" workloads = "aiperf.sh" - lmcache_config = ''' -chunk_size: 256 -local_cpu: true -max_local_cpu_size: 6.0 -nixl_buffer_size: 2079377920 -nixl_buffer_device: "cpu" -extra_config: - enable_nixl_storage: false - nixl_backend: "POSIX" - nixl_path: "/tmp/" - nixl_pool_size: 2048 -''' + + [Tests.cmd_args.lmcache] + chunk_size = 256 + local_cpu = true + max_local_cpu_size = 6.0 + nixl_buffer_size = 2079377920 + nixl_buffer_device = "cpu" + + [Tests.cmd_args.lmcache.extra_config] + enable_nixl_storage = false + nixl_backend = "POSIX" + nixl_path = "/tmp/" + nixl_pool_size = 2048 [Tests.cmd_args.dynamo] backend = "vllm" diff --git a/doc/workloads/ai_dynamo.rst b/doc/workloads/ai_dynamo.rst index de3645631..c4499d60b 100644 --- a/doc/workloads/ai_dynamo.rst +++ b/doc/workloads/ai_dynamo.rst @@ -117,24 +117,25 @@ AIDynamo can pass an LMCache YAML config to the worker processes by setting ``LM container. This only propagates the LMCache configuration; the vLLM/SGLang runtime still needs to be launched with the appropriate LMCache or KV-transfer connector for that image/version. -The preferred form is inline YAML in ``cmd_args.lmcache_config``. CloudAI writes the YAML to the run output directory, -mounts that directory as ``/cloudai_run_results``, and passes the generated file path to the workload script: +The preferred form is structured TOML under ``[cmd_args.lmcache]``. CloudAI converts that object to YAML in the +run output directory, mounts that directory as ``/cloudai_run_results``, and exports the generated file path as +``LMCACHE_CONFIG_FILE``: .. code-block:: toml [cmd_args] - lmcache_config = ''' - chunk_size: 256 - local_cpu: true - max_local_cpu_size: 6.0 - nixl_buffer_size: 2079377920 - nixl_buffer_device: "cpu" - extra_config: - enable_nixl_storage: false - nixl_backend: "POSIX" - nixl_path: "/tmp/" - nixl_pool_size: 2048 - ''' + [cmd_args.lmcache] + chunk_size = 256 + local_cpu = true + max_local_cpu_size = 6.0 + nixl_buffer_size = 2079377920 + nixl_buffer_device = "cpu" + + [cmd_args.lmcache.extra_config] + enable_nixl_storage = false + nixl_backend = "POSIX" + nixl_path = "/tmp/" + nixl_pool_size = 2048 For an example that uses test-in-scenario mode, see ``conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml``. Because the test is fully defined inside the scenario, @@ -159,11 +160,8 @@ For multi-node LMCache storage tests, any path referenced by the LMCache YAML, s storage, must be visible and writable from every node that is expected to share cached data. A node-local path such as ``/tmp`` is suitable only for single-node smoke tests or configuration propagation checks. -The legacy ``[cmd_args.lmcache]`` section is still supported. It installs the configured LMCache repository, can generate -a simple LMCache config from structured TOML fields, and can launch ``lmcache_controller`` when ``enable_controller`` and -``controller_cmd`` are configured. Use this path for older/custom Dynamo images that still expect CloudAI to generate the -LMCache config and optionally start the controller. For arbitrary LMCache YAML, prefer ``lmcache_config`` or -``lmcache_config_path``. +CloudAI does not start an LMCache controller from this section; use the runtime/image-specific deployment mechanism if +the selected LMCache mode requires one. Semantic Degradation With AIPerf Accuracy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 5ea805147..6ecdb3192 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -232,7 +232,7 @@ def populate_prefill_decode_args(self) -> "AIDynamoArgs": class LMCacheArgs(BaseModel): - """Arguments for LMCache.""" + """Backward-compatible typed shape for common LMCache YAML fields.""" model_config = ConfigDict(extra="allow") @@ -240,38 +240,13 @@ class LMCacheArgs(BaseModel): local_cpu: bool = False nixl_buffer_size: int = 10737418240 nixl_buffer_device: str = "cuda" - extra_config_enable_nixl_storage: bool = True - extra_config_nixl_backend: str = "GDS_MT" - extra_config_nixl_file_pool_size: int = 64 - - # LMCache controller configuration - enable_controller: bool = True - lmcache_instance_id: str = "lmcache_default_instance" - controller_url: str = "localhost:9001" - lmcache_worker_port: int = 8788 - distributed_url: str = "localhost:8789" + extra_config: dict = Field(default_factory=dict) class LMCache(BaseModel): - """LMCache configuration.""" - - model_config = ConfigDict(extra="forbid") + """Raw LMCache YAML configuration.""" - controller_cmd: str = "lmcache_controller --host localhost --port 9000 --monitor-port 9001" - repo: GitRepo = GitRepo( - url="https://github.com/LMCache/LMCache.git", commit="ab8530993992db873869ba882320953582d94309" - ) - - args: LMCacheArgs = Field(default_factory=LMCacheArgs) - extra_args: str | list[str] | None = Field( - default=None, - serialization_alias="extra-args", - validation_alias=AliasChoices("extra-args", "extra_args"), - ) - - @property - def installables(self) -> list[Installable]: - return [self.repo] + model_config = ConfigDict(extra="allow") class GenAIPerf(Workload): @@ -353,20 +328,16 @@ class AIDynamoCmdArgs(CmdArgs): docker_image_url: str storage_cache_dir: Optional[str | list[str]] = Field(default="/tmp", serialization_alias="storage_cache_dir") + dynamo: AIDynamoArgs + + lmcache: LMCache | None = None lmcache_config_path: str | None = Field( default=None, validation_alias=AliasChoices("lmcache-config-path", "lmcache_config_path"), serialization_alias="lmcache-config-path", description="Path to an LMCache YAML config that is already available inside the container.", ) - lmcache_config: str | None = Field( - default=None, - validation_alias=AliasChoices("lmcache-config", "lmcache_config"), - serialization_alias="lmcache-config", - description="Inline LMCache YAML config. CloudAI writes it to the run output and passes it to workers.", - ) - dynamo: AIDynamoArgs - lmcache: LMCache = Field(default_factory=LMCache) + genai_perf: GenAIPerf = Field(default_factory=GenAIPerf) aiperf: AIPerf = Field(default_factory=AIPerf) aiperf_accuracy: AIPerfAccuracy | None = None @@ -382,24 +353,13 @@ def validate_workloads(cls, v: str) -> str: raise ValueError(f"Invalid workload: {workload}. Available workloads: {allowed_workloads}") return ",".join(values) - @model_validator(mode="after") - def validate_lmcache_config(self) -> "AIDynamoCmdArgs": - if self.lmcache_config_path and self.lmcache_config: - raise ValueError("Only one of lmcache_config_path or lmcache_config can be set") - return self - @property def workloads_list(self) -> list[str]: return [w.strip() for w in self.workloads.split(",")] @property def installables(self) -> list[Installable]: - installables: list[Installable] = [] - if "lmcache" in self.model_fields_set: - installables.extend(self.lmcache.installables) - return [ - *installables, *self.genai_perf.installables, *self.aiperf.installables, *(self.aiperf_accuracy.installables if self.aiperf_accuracy else []), diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh index ab51086c9..53ef4671a 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh @@ -31,8 +31,6 @@ declare -A prefill_config declare -A prefill_args declare -A decode_config declare -A decode_args -declare -A lmcache_args -declare -A lmcache_config declare -A genai_perf_args declare -A genai_perf_config declare -A aiperf_args @@ -163,10 +161,6 @@ _parse_cli_pairs() { decode_args["--${key#--decode-args-}"]="$2" ;; --decode-*) decode_config["${key#--decode-}"]="$2" ;; - --lmcache-args-*) - lmcache_args["${key#--lmcache-args-}"]="$2" ;; - --lmcache-*) - lmcache_config["${key#--lmcache-}"]="$2" ;; --genai_perf-args-*) genai_perf_args["--${key#--genai_perf-args-}"]="$2" ;; --genai_perf-*) @@ -259,28 +253,17 @@ _set_nodelists() } _has_connector() { - # Check if a specific connector is in a comma/space/Python-list connector value. + # Check if a specific connector is in the comma-separated connector list. local needle="$1" - local connectors="${prefill_args["--connector"]:-} ${decode_args["--connector"]:-}" - connectors="${connectors//[/ }" - connectors="${connectors//]/ }" - connectors="${connectors//,/ }" - connectors="${connectors//\'/ }" - connectors="${connectors//\"/ }" - [[ " ${connectors} " == *" ${needle} "* ]] -} - -_has_lmcache_config() { - [[ -n "${lmcache_config["config-path"]:-}" ]] + local prefill_connectors="${prefill_args["--connector"]:-}" + local decode_connectors="${decode_args["--connector"]:-}" + [[ ",$prefill_connectors," == *",$needle,"* ]] || [[ ",$decode_connectors," == *",$needle,"* ]] } _apply_connector_settings() { - if _has_connector "lmcache" || _has_lmcache_config; then + if _has_connector "lmcache" || [[ -n "${LMCACHE_CONFIG_FILE:-}" ]]; then export ENABLE_LMCACHE=1 fi - if _has_lmcache_config; then - export LMCACHE_CONFIG_FILE="${lmcache_config["config-path"]}" - fi if _has_connector "kvbm"; then export ENABLE_KVBM=1 fi @@ -376,8 +359,7 @@ _dump_args() { log "Prefill args:\n$(arg_array_to_string prefill_args)" log "Decode config params:\n$(arg_array_to_string decode_config)" log "Decode args:\n$(arg_array_to_string decode_args)" - log "LMCache config params:\n$(arg_array_to_string lmcache_config)" - log "LMCache args:\n$(arg_array_to_string lmcache_args)" + log "LMCache config file: ${LMCACHE_CONFIG_FILE:-}" log "GenAI config params:\n$(arg_array_to_string genai_perf_config)" log "GenAI-Perf args:\n$(arg_array_to_string genai_perf_args)" log "AIPerf config params:\n$(arg_array_to_string aiperf_config)" @@ -865,21 +847,6 @@ function launch_prefill() done } -function launch_lmcache_controller() -{ - if ! _has_connector "lmcache"; then - return - fi - - if [[ -z "${lmcache_config["controller_cmd"]:-}" ]]; then - log "LMCache connector is set but no LMCache controller command is configured. Skipping controller launch." - return - fi - - log "Launching LMCache controller with cmd: ${lmcache_config["controller_cmd"]}" - ${lmcache_config["controller_cmd"]} > ${RESULTS_DIR}/lmcache_controller.log 2>&1 -} - function wait_for_dynamo_frontend() { local want_prefill=$(_expected_ready_prefill) @@ -976,53 +943,12 @@ function setup_kvbm() function setup_lmcache() { - if ! _has_connector "lmcache"; then - log "Connector list does not include lmcache. Skipping setup_lmcache" + if [[ -z "${LMCACHE_CONFIG_FILE:-}" ]]; then + log "LMCACHE_CONFIG_FILE is not set. Skipping setup_lmcache" return fi - if _has_lmcache_config; then - log "Using explicit LMCache config file: ${lmcache_config["config-path"]}" - setup_cufile - return - fi - - if [[ -z "${lmcache_config["repo"]:-}" ]]; then - log "LMCache connector is set but no generated LMCache config is configured. Skipping setup_lmcache" - return - fi - - _require_cmd uv - local lmcache_path="${lmcache_config["repo"]}" - log "Setting up LMCache; installing LMCache using: uv pip install $lmcache_path" - uv pip install -e "$lmcache_path" - - setup_storage_cache_dir "lmcache" - - export LMCACHE_CONFIG_FILE=$RESULTS_DIR/lmcache-nixl-config.yaml - rm -f $LMCACHE_CONFIG_FILE - - lmcache_args["extra_config_nixl_path"]="$STORAGE_CACHE_DIR" - - for key in "${!lmcache_args[@]}"; do - shopt -s nocasematch - if [[ "$key" == "extra_config"* ]]; then - continue - fi - - val="${lmcache_args[$key]}" - echo "$key: $val" >> $LMCACHE_CONFIG_FILE - done - - echo "extra_config:" >> $LMCACHE_CONFIG_FILE - for key in "${!lmcache_args[@]}"; do - shopt -s nocasematch - if [[ "$key" == "extra_config"* ]]; then - nkey="${key#extra_config_}" - val="${lmcache_args[$key]}" - echo " $nkey: $val" >> $LMCACHE_CONFIG_FILE - fi - done + log "Using LMCache config file: ${LMCACHE_CONFIG_FILE}" setup_cufile } @@ -1157,8 +1083,6 @@ function main() fi if _is_frontend_node; then - launch_lmcache_controller & - sleep 10 launch_workloads & diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py index e1f33dcec..2c5a3a197 100644 --- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py @@ -19,6 +19,7 @@ from pathlib import Path from typing import List, cast +import yaml from pydantic import BaseModel, TypeAdapter, ValidationError from cloudai.core import File, GitRepo @@ -30,21 +31,33 @@ class AIDynamoSlurmCommandGenStrategy(SlurmCommandGenStrategy): """Command generation strategy for AI Dynamo on Slurm systems.""" - def _container_mounts(self) -> list[str]: - td = cast(AIDynamoTestDefinition, self.test_run.test) + @property + def td(self) -> AIDynamoTestDefinition: + return cast(AIDynamoTestDefinition, self.test_run.test) + def _container_mounts(self) -> list[str]: result = [f"{self.system.hf_home_path.absolute()}:{self.CONTAINER_MOUNT_HF_HOME}"] - logging.info(f"storage_cache_dir: {td.cmd_args.storage_cache_dir}") - if td.cmd_args.storage_cache_dir: - result.append(f"{td.cmd_args.storage_cache_dir}:{td.cmd_args.storage_cache_dir}") + logging.info(f"storage_cache_dir: {self.td.cmd_args.storage_cache_dir}") + if self.td.cmd_args.storage_cache_dir: + result.append(f"{self.td.cmd_args.storage_cache_dir}:{self.td.cmd_args.storage_cache_dir}") return result + @property + def final_env_vars(self) -> dict[str, str | list[str]]: + env_vars = super().final_env_vars + if lmcache_config_file := self._lmcache_config_file_env_value(): + env_vars["LMCACHE_CONFIG_FILE"] = lmcache_config_file + return env_vars + + @final_env_vars.setter + def final_env_vars(self, value: dict[str, str | list[str]]) -> None: + self._final_env_vars = value + def image_path(self) -> str | None: - tdef: AIDynamoTestDefinition = cast(AIDynamoTestDefinition, self.test_run.test) - if tdef.docker_image and tdef.docker_image.installed_path: - return str(tdef.docker_image.installed_path) + if self.td.docker_image and self.td.docker_image.installed_path: + return str(self.td.docker_image.installed_path) return None def _get_toml_args(self, base_model: BaseModel, prefix: str, exclude: List[str] | None = None) -> List[str]: @@ -87,22 +100,29 @@ def _get_nested_toml_args(self, base_model: BaseModel, prefix: str) -> List[str] return result - def _prepare_lmcache_config(self, td: AIDynamoTestDefinition) -> str | None: - if td.cmd_args.lmcache_config_path: - return td.cmd_args.lmcache_config_path + def _lmcache_config_file_env_value(self) -> str | None: + if self.td.cmd_args.lmcache_config_path: + return self.td.cmd_args.lmcache_config_path + if self.td.cmd_args.lmcache is not None: + return f"{self.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" + return None - if td.cmd_args.lmcache_config is None: - return None + def _prepare_lmcache_config(self) -> None: + if lmcache_config_file := self._lmcache_config_file_env_value(): + self.td.extra_env_vars["LMCACHE_CONFIG_FILE"] = lmcache_config_file + + if self.td.cmd_args.lmcache is None: + return + + config_obj = self.td.cmd_args.lmcache.model_dump(by_alias=True, exclude_none=True, exclude_unset=True) + config_raw = yaml.safe_dump(config_obj, sort_keys=False) self.test_run.output_path.mkdir(parents=True, exist_ok=True) config_path = self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME - config_path.write_text(td.cmd_args.lmcache_config) - return f"{self.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" - - def _should_emit_lmcache_args(self, td: AIDynamoTestDefinition) -> bool: - return "lmcache" in td.cmd_args.model_fields_set + config_path.write_text(config_raw) def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: + self._prepare_lmcache_config() assert td.repo.installed_path args = [ "--user $USER", @@ -118,9 +138,6 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: if td.cmd_args.storage_cache_dir: args.append(f"--storage-cache-dir {td.cmd_args.storage_cache_dir}") - if lmcache_config_path := self._prepare_lmcache_config(td): - args.append(f"--lmcache-config-path {shlex.quote(lmcache_config_path)}") - args.extend( self._get_toml_args( td.cmd_args.dynamo, @@ -136,9 +153,6 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: args.extend(self._get_nested_toml_args(td.cmd_args.dynamo.prefill_worker, "--prefill-")) args.extend(self._get_nested_toml_args(td.cmd_args.dynamo.decode_worker, "--decode-")) - if self._should_emit_lmcache_args(td): - args.extend(self._get_nested_toml_args(td.cmd_args.lmcache, "--lmcache-")) - args.extend(self._get_nested_toml_args(td.cmd_args.genai_perf, "--genai_perf-")) args.extend(self._get_nested_toml_args(td.cmd_args.aiperf, "--aiperf-")) if td.cmd_args.aiperf_accuracy is not None: @@ -147,7 +161,6 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: return args def _gen_srun_command(self) -> str: - td = cast(AIDynamoTestDefinition, self.test_run.test) num_nodes, node_list = self.get_cached_nodes_spec() out_dir = str(self.test_run.output_path.absolute()) @@ -162,10 +175,10 @@ def _gen_srun_command(self) -> str: f"--output={out_dir}/node-%n-stdout.txt", f"--error={out_dir}/node-%n-stderr.txt", "bash", - f"{self.CONTAINER_MOUNT_INSTALL}/{td.script.src.name}", + f"{self.CONTAINER_MOUNT_INSTALL}/{self.td.script.src.name}", ] ) - srun_cmd.extend(self._gen_script_args(td)) + srun_cmd.extend(self._gen_script_args(self.td)) return " \\\n ".join(srun_cmd) + "\n" def _validate_worker_nodes( @@ -204,13 +217,12 @@ def get_cached_nodes_spec(self) -> tuple[int, list[str]]: if cache_key in self._node_spec_cache: return self._node_spec_cache[cache_key] - td = cast(AIDynamoTestDefinition, self.test_run.test) prefill_n, prefill_nodes = 0, "" - if td.cmd_args.dynamo.prefill_worker: - prefill_n = cast(int, td.cmd_args.dynamo.prefill_worker.num_nodes) - prefill_nodes = td.cmd_args.dynamo.prefill_worker.nodes - decode_n = td.cmd_args.dynamo.decode_worker.num_nodes - decode_nodes = td.cmd_args.dynamo.decode_worker.nodes + if self.td.cmd_args.dynamo.prefill_worker: + prefill_n = cast(int, self.td.cmd_args.dynamo.prefill_worker.num_nodes) + prefill_nodes = self.td.cmd_args.dynamo.prefill_worker.nodes + decode_n = self.td.cmd_args.dynamo.decode_worker.num_nodes + decode_nodes = self.td.cmd_args.dynamo.decode_worker.nodes assert isinstance(prefill_n, int), "prefill_worker.num_nodes must be an integer" assert isinstance(decode_n, int), "decode_worker.num_nodes must be an integer" diff --git a/tests/ref_data/ai-dynamo.sbatch b/tests/ref_data/ai-dynamo.sbatch index 492e3c427..865444b81 100644 --- a/tests/ref_data/ai-dynamo.sbatch +++ b/tests/ref_data/ai-dynamo.sbatch @@ -61,20 +61,6 @@ srun \ --decode-args-model "model" \ --decode-args-pipeline-parallel-size "1" \ --decode-args-tensor-parallel-size "1" \ - --lmcache-controller_cmd "lmcache_controller --host localhost --port 9000 --monitor-port 9001" \ - --lmcache-repo "/cloudai_install/LMCache__ab8530993992db873869ba882320953582d94309" \ - --lmcache-args-chunk_size "256" \ - --lmcache-args-local_cpu "False" \ - --lmcache-args-nixl_buffer_size "10737418240" \ - --lmcache-args-nixl_buffer_device "cuda" \ - --lmcache-args-extra_config_enable_nixl_storage "True" \ - --lmcache-args-extra_config_nixl_backend "GDS_MT" \ - --lmcache-args-extra_config_nixl_file_pool_size "64" \ - --lmcache-args-enable_controller "True" \ - --lmcache-args-lmcache_instance_id "lmcache_default_instance" \ - --lmcache-args-controller_url "localhost:9001" \ - --lmcache-args-lmcache_worker_port "8788" \ - --lmcache-args-distributed_url "localhost:8789" \ --genai_perf-name "genai_perf" \ --genai_perf-cmd "genai-perf profile" \ --genai_perf-script "/cloudai_install/genai_perf.sh" \ @@ -89,4 +75,4 @@ srun \ --aiperf-name "aiperf" \ --aiperf-cmd "aiperf profile" \ --aiperf-script "/cloudai_install/aiperf.sh" \ - --aiperf-report-name "aiperf_report.csv" \ No newline at end of file + --aiperf-report-name "aiperf_report.csv" diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py index cf603b767..d45416595 100644 --- a/tests/test_acceptance.py +++ b/tests/test_acceptance.py @@ -35,8 +35,6 @@ AIDynamoCmdArgs, AIDynamoTestDefinition, GenAIPerf, - LMCache, - LMCacheArgs, WorkerBaseArgs, WorkerConfig, ) @@ -528,14 +526,6 @@ def test_req(request, slurm_system: SlurmSystem, partial_tr: partial[TestRun]) - "warmup-request-count": 10, } ), - lmcache=LMCache( - args=LMCacheArgs(), - repo=GitRepo( - url="https://github.com/LMCache/LMCache.git", - commit="ab8530993992db873869ba882320953582d94309", - installed_path=slurm_system.install_path, - ), - ), ), ), ), diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 2c6d8bc0f..ff7bc6d4a 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -19,6 +19,7 @@ from typing import cast import pytest +import yaml from cloudai._core.test_scenario import TestRun from cloudai.core import GitRepo @@ -33,7 +34,6 @@ AIPerfAccuracy, GenAIPerf, LMCache, - LMCacheArgs, WorkerBaseArgs, WorkerConfig, ) @@ -88,7 +88,6 @@ def cmd_args() -> AIDynamoCmdArgs: "request-count": 10, } ), - lmcache=LMCache(args=LMCacheArgs()), ) @@ -230,32 +229,39 @@ def test_gen_script_args_quotes_worker_json_args(strategy: AIDynamoSlurmCommandG assert f"--decode-args-kv-transfer-config '{config}'" in result -def test_gen_script_args_writes_inline_lmcache_config(strategy: AIDynamoSlurmCommandGenStrategy) -> None: +def test_gen_script_args_uses_container_lmcache_config_path(strategy: AIDynamoSlurmCommandGenStrategy) -> None: td = cast(AIDynamoTestDefinition, strategy.test_run.test) - td.cmd_args.lmcache_config = "chunk_size: 256\nlocal_cpu: true\n" + td.cmd_args.lmcache_config_path = "/opt/shared/lmcache/config.yaml" result = strategy._gen_script_args(td) - config_path = strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME - assert f"--lmcache-config-path {strategy.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" in result - assert config_path.read_text() == td.cmd_args.lmcache_config + assert td.extra_env_vars["LMCACHE_CONFIG_FILE"] == "/opt/shared/lmcache/config.yaml" + assert not (strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).exists() + assert not any(arg.startswith("--lmcache") for arg in result) -def test_gen_script_args_uses_container_lmcache_config_path(strategy: AIDynamoSlurmCommandGenStrategy) -> None: +def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCommandGenStrategy) -> None: td = cast(AIDynamoTestDefinition, strategy.test_run.test) - td.cmd_args.lmcache_config_path = "/opt/shared/lmcache/config.yaml" + td.cmd_args.lmcache = LMCache.model_validate( + { + "chunk_size": 512, + "local_cpu": True, + "extra_config": { + "enable_nixl_storage": False, + "nixl_backend": "POSIX", + "nixl_path": "/tmp/", + }, + } + ) result = strategy._gen_script_args(td) - assert "--lmcache-config-path /opt/shared/lmcache/config.yaml" in result - assert not (strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).exists() - - -def test_lmcache_config_path_and_inline_config_are_mutually_exclusive() -> None: - with pytest.raises(ValueError, match="Only one of lmcache_config_path or lmcache_config"): - AIDynamoCmdArgs( - docker_image_url="url", - dynamo=AIDynamoArgs(), - lmcache_config_path="/opt/shared/lmcache/config.yaml", - lmcache_config="chunk_size: 256\n", - ) + config_path = strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME + config = yaml.safe_load(config_path.read_text()) + assert td.extra_env_vars["LMCACHE_CONFIG_FILE"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" + assert config["chunk_size"] == 512 + assert config["local_cpu"] is True + assert config["extra_config"]["enable_nixl_storage"] is False + assert config["extra_config"]["nixl_backend"] == "POSIX" + assert config["extra_config"]["nixl_path"] == "/tmp/" + assert not any(arg.startswith("--lmcache") for arg in result) diff --git a/tests/workloads/ai_dynamo/test_json_gen_strategy_kubernetes.py b/tests/workloads/ai_dynamo/test_json_gen_strategy_kubernetes.py index 569978f49..b3120f5b9 100644 --- a/tests/workloads/ai_dynamo/test_json_gen_strategy_kubernetes.py +++ b/tests/workloads/ai_dynamo/test_json_gen_strategy_kubernetes.py @@ -29,8 +29,6 @@ AIDynamoKubernetesJsonGenStrategy, AIDynamoTestDefinition, GenAIPerf, - LMCache, - LMCacheArgs, WorkerBaseArgs, WorkerConfig, ) @@ -54,7 +52,6 @@ def dynamo(request: Any) -> AIDynamoTestDefinition: ) ), genai_perf=GenAIPerf(), - lmcache=LMCache(args=LMCacheArgs()), ), ) if request.param == "disagg": diff --git a/tests/workloads/ai_dynamo/test_report_gen_strategy.py b/tests/workloads/ai_dynamo/test_report_gen_strategy.py index 47e214421..f81e9a2dd 100644 --- a/tests/workloads/ai_dynamo/test_report_gen_strategy.py +++ b/tests/workloads/ai_dynamo/test_report_gen_strategy.py @@ -28,8 +28,6 @@ AIPerf, AIPerfAccuracy, GenAIPerf, - LMCache, - LMCacheArgs, WorkerBaseArgs, WorkerConfig, ) @@ -89,7 +87,6 @@ def ai_dynamo_tr(tmp_path: Path) -> TestRun: ), ), genai_perf=GenAIPerf(), - lmcache=LMCache(args=LMCacheArgs()), ), ) tr = TestRun(name="ai_dynamo", test=test, num_nodes=1, nodes=[], output_path=tmp_path) @@ -121,7 +118,6 @@ def ai_dynamo_aiperf_tr(tmp_path: Path) -> TestRun: ), ), aiperf=AIPerf(), - lmcache=LMCache(args=LMCacheArgs()), ), ) tr = TestRun(name="ai_dynamo_aiperf", test=test, num_nodes=1, nodes=[], output_path=tmp_path) @@ -148,7 +144,6 @@ def ai_dynamo_aiperf_with_split_accuracy_tr(tmp_path: Path) -> TestRun: ), aiperf=AIPerf(), aiperf_accuracy=AIPerfAccuracy.model_validate({"cli": get_aiperf_accuracy_cli()}), - lmcache=LMCache(args=LMCacheArgs()), ), ) tr = TestRun(name="ai_dynamo_aiperf_with_split_accuracy", test=test, num_nodes=1, nodes=[], output_path=tmp_path) @@ -176,7 +171,6 @@ def ai_dynamo_genai_perf_with_split_accuracy_tr(tmp_path: Path) -> TestRun: ), genai_perf=GenAIPerf(), aiperf_accuracy=AIPerfAccuracy.model_validate({"cli": get_aiperf_accuracy_cli()}), - lmcache=LMCache(args=LMCacheArgs()), ), ) tr = TestRun( From 389d63da5d7740350cd3668f92d53461f4fded14 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 15:38:11 -0700 Subject: [PATCH 05/15] refactor keeps going --- doc/workloads/ai_dynamo.rst | 8 ++-- src/cloudai/workloads/ai_dynamo/__init__.py | 4 -- src/cloudai/workloads/ai_dynamo/ai_dynamo.py | 28 +------------- .../ai_dynamo/slurm_command_gen_strategy.py | 24 +++--------- .../test_command_gen_strategy_slurm.py | 37 +++++++------------ 5 files changed, 23 insertions(+), 78 deletions(-) diff --git a/doc/workloads/ai_dynamo.rst b/doc/workloads/ai_dynamo.rst index c4499d60b..585a9ccf4 100644 --- a/doc/workloads/ai_dynamo.rst +++ b/doc/workloads/ai_dynamo.rst @@ -146,15 +146,13 @@ For an example that uses test-in-scenario mode, see uv run cloudai run --system-config \ --test-scenario conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml -Alternatively, provide a path that already exists inside the container with ``cmd_args.lmcache_config_path``. Mount the -host file or its parent directory with ``extra_container_mounts``: +Alternatively, mount your own LMCache YAML file with ``extra_container_mounts`` and set ``LMCACHE_CONFIG_FILE`` through +``extra_env_vars``: .. code-block:: toml extra_container_mounts = ["/host/lmcache:/lmcache"] - - [cmd_args] - lmcache_config_path = "/lmcache/config.yaml" + extra_env_vars = { LMCACHE_CONFIG_FILE = "/lmcache/config.yaml" } For multi-node LMCache storage tests, any path referenced by the LMCache YAML, such as ``nixl_path`` for POSIX-backed storage, must be visible and writable from every node that is expected to share cached data. A node-local path such as diff --git a/src/cloudai/workloads/ai_dynamo/__init__.py b/src/cloudai/workloads/ai_dynamo/__init__.py index 6ce965fbb..e5363abf4 100644 --- a/src/cloudai/workloads/ai_dynamo/__init__.py +++ b/src/cloudai/workloads/ai_dynamo/__init__.py @@ -22,8 +22,6 @@ AIPerf, AIPerfAccuracy, GenAIPerf, - LMCache, - LMCacheArgs, WorkerBaseArgs, WorkerConfig, ) @@ -42,8 +40,6 @@ "AIPerf", "AIPerfAccuracy", "GenAIPerf", - "LMCache", - "LMCacheArgs", "WorkerBaseArgs", "WorkerConfig", ] diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 6ecdb3192..04beef3ef 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -231,24 +231,6 @@ def populate_prefill_decode_args(self) -> "AIDynamoArgs": return self -class LMCacheArgs(BaseModel): - """Backward-compatible typed shape for common LMCache YAML fields.""" - - model_config = ConfigDict(extra="allow") - - chunk_size: int = 256 - local_cpu: bool = False - nixl_buffer_size: int = 10737418240 - nixl_buffer_device: str = "cuda" - extra_config: dict = Field(default_factory=dict) - - -class LMCache(BaseModel): - """Raw LMCache YAML configuration.""" - - model_config = ConfigDict(extra="allow") - - class GenAIPerf(Workload): """Workload configuration for GenAI performance profiling.""" @@ -329,15 +311,7 @@ class AIDynamoCmdArgs(CmdArgs): docker_image_url: str storage_cache_dir: Optional[str | list[str]] = Field(default="/tmp", serialization_alias="storage_cache_dir") dynamo: AIDynamoArgs - - lmcache: LMCache | None = None - lmcache_config_path: str | None = Field( - default=None, - validation_alias=AliasChoices("lmcache-config-path", "lmcache_config_path"), - serialization_alias="lmcache-config-path", - description="Path to an LMCache YAML config that is already available inside the container.", - ) - + lmcache: dict | None = None genai_perf: GenAIPerf = Field(default_factory=GenAIPerf) aiperf: AIPerf = Field(default_factory=AIPerf) aiperf_accuracy: AIPerfAccuracy | None = None diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py index 2c5a3a197..c481dc660 100644 --- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py @@ -47,8 +47,8 @@ def _container_mounts(self) -> list[str]: @property def final_env_vars(self) -> dict[str, str | list[str]]: env_vars = super().final_env_vars - if lmcache_config_file := self._lmcache_config_file_env_value(): - env_vars["LMCACHE_CONFIG_FILE"] = lmcache_config_file + if self.td.cmd_args.lmcache is not None: + env_vars["LMCACHE_CONFIG_FILE"] = f"{self.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" return env_vars @final_env_vars.setter @@ -100,30 +100,18 @@ def _get_nested_toml_args(self, base_model: BaseModel, prefix: str) -> List[str] return result - def _lmcache_config_file_env_value(self) -> str | None: - if self.td.cmd_args.lmcache_config_path: - return self.td.cmd_args.lmcache_config_path - if self.td.cmd_args.lmcache is not None: - return f"{self.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" - return None - - def _prepare_lmcache_config(self) -> None: - if lmcache_config_file := self._lmcache_config_file_env_value(): - self.td.extra_env_vars["LMCACHE_CONFIG_FILE"] = lmcache_config_file - + def _prepare_lmcache_config(self): if self.td.cmd_args.lmcache is None: return - config_obj = self.td.cmd_args.lmcache.model_dump(by_alias=True, exclude_none=True, exclude_unset=True) - config_raw = yaml.safe_dump(config_obj, sort_keys=False) - self.test_run.output_path.mkdir(parents=True, exist_ok=True) config_path = self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME - config_path.write_text(config_raw) + config_path.write_text(yaml.safe_dump(self.td.cmd_args.lmcache, sort_keys=False)) def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: self._prepare_lmcache_config() - assert td.repo.installed_path + if not td.repo.installed_path: + raise ValueError("Dynamo repo is not installed") args = [ "--user $USER", f"--install-dir {self.CONTAINER_MOUNT_INSTALL}", diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index ff7bc6d4a..2f1c3790e 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -33,7 +33,6 @@ AIPerf, AIPerfAccuracy, GenAIPerf, - LMCache, WorkerBaseArgs, WorkerConfig, ) @@ -229,36 +228,26 @@ def test_gen_script_args_quotes_worker_json_args(strategy: AIDynamoSlurmCommandG assert f"--decode-args-kv-transfer-config '{config}'" in result -def test_gen_script_args_uses_container_lmcache_config_path(strategy: AIDynamoSlurmCommandGenStrategy) -> None: - td = cast(AIDynamoTestDefinition, strategy.test_run.test) - td.cmd_args.lmcache_config_path = "/opt/shared/lmcache/config.yaml" - - result = strategy._gen_script_args(td) - - assert td.extra_env_vars["LMCACHE_CONFIG_FILE"] == "/opt/shared/lmcache/config.yaml" - assert not (strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).exists() - assert not any(arg.startswith("--lmcache") for arg in result) - - def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCommandGenStrategy) -> None: td = cast(AIDynamoTestDefinition, strategy.test_run.test) - td.cmd_args.lmcache = LMCache.model_validate( - { - "chunk_size": 512, - "local_cpu": True, - "extra_config": { - "enable_nixl_storage": False, - "nixl_backend": "POSIX", - "nixl_path": "/tmp/", - }, - } - ) + td.cmd_args.lmcache = { + "chunk_size": 512, + "local_cpu": True, + "extra_config": { + "enable_nixl_storage": False, + "nixl_backend": "POSIX", + "nixl_path": "/tmp/", + }, + } result = strategy._gen_script_args(td) config_path = strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME config = yaml.safe_load(config_path.read_text()) - assert td.extra_env_vars["LMCACHE_CONFIG_FILE"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" + assert ( + strategy.final_env_vars["LMCACHE_CONFIG_FILE"] + == f"{strategy.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" + ) assert config["chunk_size"] == 512 assert config["local_cpu"] is True assert config["extra_config"]["enable_nixl_storage"] is False From b33cdc5cf5ea2eb16d38ac80b4046d6c84d60916 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 17:42:17 -0700 Subject: [PATCH 06/15] trying to fully enable lmcache and lmcache controller --- conf/experimental/ai_dynamo/test/sglang.toml | 3 + conf/experimental/ai_dynamo/test/vllm.toml | 3 + .../ai_dynamo/test_scenario/vllm_lmcache.toml | 18 ++++- doc/workloads/ai_dynamo.rst | 19 ++++- src/cloudai/workloads/ai_dynamo/__init__.py | 2 + src/cloudai/workloads/ai_dynamo/ai_dynamo.py | 9 +++ src/cloudai/workloads/ai_dynamo/ai_dynamo.sh | 80 +++++++++++++++++++ .../ai_dynamo/slurm_command_gen_strategy.py | 2 + .../test_command_gen_strategy_slurm.py | 17 +++- 9 files changed, 144 insertions(+), 9 deletions(-) diff --git a/conf/experimental/ai_dynamo/test/sglang.toml b/conf/experimental/ai_dynamo/test/sglang.toml index 1d8b80a8f..67fc999f0 100644 --- a/conf/experimental/ai_dynamo/test/sglang.toml +++ b/conf/experimental/ai_dynamo/test/sglang.toml @@ -59,6 +59,9 @@ workloads = "aiperf.sh" host = "0.0.0.0" disaggregation-transfer-backend = "nixl" + [cmd_args.lmcache_controller] + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" + [cmd_args.genai_perf] cmd = "genai-perf profile" extra-args = "--streaming --verbose -- -v --async" diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml index 5f609e142..8a5f3b939 100644 --- a/conf/experimental/ai_dynamo/test/vllm.toml +++ b/conf/experimental/ai_dynamo/test/vllm.toml @@ -51,6 +51,9 @@ workloads = "aiperf.sh" pipeline-parallel-size = 1 data-parallel-size = 1 + [cmd_args.lmcache_controller] + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" + [cmd_args.genai_perf] cmd = "genai-perf profile" extra-args = "--streaming --verbose -- -v --async" diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml index 7f6ca5bd4..76229dbcf 100644 --- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -18,9 +18,9 @@ name = "dynamo-vllm-lmcache" job_status_check = false [[Tests]] -id = "test.disagg.single-node.lmcache-config" -name = "vLLM LMCache config propagation" -description = "Self-contained AIDynamo scenario showing LMCache config generation and propagation to vLLM workers." +id = "test.disagg.lmcache-controller" +name = "vLLM disaggregated LMCache with controller" +description = "Self-contained AIDynamo scenario wiring vLLM disaggregated inference, LMCache config propagation, and LMCache controller launch." test_template_name = "AIDynamo" time_limit = "00:10:00" extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] @@ -30,8 +30,13 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] workloads = "aiperf.sh" [Tests.cmd_args.lmcache] + enable_controller = true chunk_size = 256 local_cpu = true + lmcache_instance_id = "lmcache_default_instance" + controller_url = "{frontend_node}:9001" + distributed_url = "{frontend_node}:8789" + lmcache_worker_port = 8788 max_local_cpu_size = 6.0 nixl_buffer_size = 2079377920 nixl_buffer_device = "cpu" @@ -39,9 +44,12 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] [Tests.cmd_args.lmcache.extra_config] enable_nixl_storage = false nixl_backend = "POSIX" - nixl_path = "/tmp/" + nixl_path = "{storage_cache_dir}" nixl_pool_size = 2048 + [Tests.cmd_args.lmcache_controller] + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" + [Tests.cmd_args.dynamo] backend = "vllm" model = "Qwen/Qwen3-0.6B" @@ -54,6 +62,7 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] [Tests.cmd_args.dynamo.prefill_worker.args] gpu-memory-utilization = 0.8 + kv-transfer-config = '{"kv_connector":"PdConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"},{"kv_connector":"NixlConnector","kv_role":"kv_both"}]}}' tensor-parallel-size = 4 pipeline-parallel-size = 1 data-parallel-size = 1 @@ -66,6 +75,7 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] [Tests.cmd_args.dynamo.decode_worker.args] gpu-memory-utilization = 0.8 + kv-transfer-config = '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}' tensor-parallel-size = 4 pipeline-parallel-size = 1 data-parallel-size = 1 diff --git a/doc/workloads/ai_dynamo.rst b/doc/workloads/ai_dynamo.rst index 585a9ccf4..5a541af98 100644 --- a/doc/workloads/ai_dynamo.rst +++ b/doc/workloads/ai_dynamo.rst @@ -127,6 +127,8 @@ run output directory, mounts that directory as ``/cloudai_run_results``, and exp [cmd_args.lmcache] chunk_size = 256 local_cpu = true + controller_url = "{frontend_node}:9001" + distributed_url = "{frontend_node}:8789" max_local_cpu_size = 6.0 nixl_buffer_size = 2079377920 nixl_buffer_device = "cpu" @@ -134,7 +136,7 @@ run output directory, mounts that directory as ``/cloudai_run_results``, and exp [cmd_args.lmcache.extra_config] enable_nixl_storage = false nixl_backend = "POSIX" - nixl_path = "/tmp/" + nixl_path = "{storage_cache_dir}" nixl_pool_size = 2048 For an example that uses test-in-scenario mode, see @@ -158,8 +160,19 @@ For multi-node LMCache storage tests, any path referenced by the LMCache YAML, s storage, must be visible and writable from every node that is expected to share cached data. A node-local path such as ``/tmp`` is suitable only for single-node smoke tests or configuration propagation checks. -CloudAI does not start an LMCache controller from this section; use the runtime/image-specific deployment mechanism if -the selected LMCache mode requires one. +LMCache YAML values can use runtime placeholders. CloudAI renders them inside the Slurm job before launching workers: +``{frontend_node}``, ``{frontend_ip}``, ``{results_dir}``, and ``{storage_cache_dir}``. Unknown placeholders fail the +run before worker processes start. + +If the selected LMCache mode needs a controller, CloudAI can start one on the frontend node: + +.. code-block:: toml + + [cmd_args.lmcache_controller] + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" + +This only launches the process. For disaggregated or multi-node runs, the LMCache YAML still needs a ``controller_url`` +that resolves to the frontend node from every worker, such as ``"{frontend_node}:9001"``. Semantic Degradation With AIPerf Accuracy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/cloudai/workloads/ai_dynamo/__init__.py b/src/cloudai/workloads/ai_dynamo/__init__.py index e5363abf4..61ab6f120 100644 --- a/src/cloudai/workloads/ai_dynamo/__init__.py +++ b/src/cloudai/workloads/ai_dynamo/__init__.py @@ -22,6 +22,7 @@ AIPerf, AIPerfAccuracy, GenAIPerf, + LMCacheController, WorkerBaseArgs, WorkerConfig, ) @@ -40,6 +41,7 @@ "AIPerf", "AIPerfAccuracy", "GenAIPerf", + "LMCacheController", "WorkerBaseArgs", "WorkerConfig", ] diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 04beef3ef..2771f5de6 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -303,6 +303,14 @@ class Constraints(BaseModel): tp_times_pp_le_gpus_per_node: bool = True +class LMCacheController(BaseModel): + """Optional LMCache controller process to launch on the frontend node.""" + + model_config = ConfigDict(extra="forbid") + + cmd: str + + class AIDynamoCmdArgs(CmdArgs): """Arguments for AI Dynamo.""" @@ -312,6 +320,7 @@ class AIDynamoCmdArgs(CmdArgs): storage_cache_dir: Optional[str | list[str]] = Field(default="/tmp", serialization_alias="storage_cache_dir") dynamo: AIDynamoArgs lmcache: dict | None = None + lmcache_controller: LMCacheController | None = None genai_perf: GenAIPerf = Field(default_factory=GenAIPerf) aiperf: AIPerf = Field(default_factory=AIPerf) aiperf_accuracy: AIPerfAccuracy | None = None diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh index 53ef4671a..74d84c855 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh @@ -38,6 +38,8 @@ declare -A aiperf_config declare -A aiperf_accuracy_args declare -A aiperf_accuracy_config +lmcache_controller_cmd="" + declare -A dynamo_args dynamo_args["backend"]="vllm" dynamo_args["node-setup-cmd"]="" @@ -161,6 +163,8 @@ _parse_cli_pairs() { decode_args["--${key#--decode-args-}"]="$2" ;; --decode-*) decode_config["${key#--decode-}"]="$2" ;; + --lmcache-controller-cmd) + lmcache_controller_cmd="$2" ;; --genai_perf-args-*) genai_perf_args["--${key#--genai_perf-args-}"]="$2" ;; --genai_perf-*) @@ -360,6 +364,7 @@ _dump_args() { log "Decode config params:\n$(arg_array_to_string decode_config)" log "Decode args:\n$(arg_array_to_string decode_args)" log "LMCache config file: ${LMCACHE_CONFIG_FILE:-}" + log "LMCache controller cmd: ${lmcache_controller_cmd}" log "GenAI config params:\n$(arg_array_to_string genai_perf_config)" log "GenAI-Perf args:\n$(arg_array_to_string genai_perf_args)" log "AIPerf config params:\n$(arg_array_to_string aiperf_config)" @@ -928,6 +933,11 @@ function setup_storage_cache_dir() chmod 755 "${STORAGE_CACHE_DIR}" } +function lmcache_storage_cache_dir() +{ + echo "${STORAGE_CACHE_DIR_BASE}/${TEST_USER}/${dynamo_args["frontend-node"]}/lmcache/cache" +} + function setup_kvbm() { if ! _has_connector "kvbm"; then @@ -941,6 +951,63 @@ function setup_kvbm() setup_cufile } +function render_lmcache_config() +{ + if [[ -z "${LMCACHE_CONFIG_FILE:-}" ]]; then + return + fi + + if [[ ! -f "${LMCACHE_CONFIG_FILE}" ]]; then + log "ERROR: LMCACHE_CONFIG_FILE does not exist: ${LMCACHE_CONFIG_FILE}" + exit 1 + fi + + _require_cmd python3 + + local frontend_node="${dynamo_args["frontend-node"]}" + local frontend_ip="$(_resolve_host_ip "$frontend_node")" + local storage_cache_dir="$(lmcache_storage_cache_dir)" + mkdir -p "$storage_cache_dir" + chmod 755 "$storage_cache_dir" + + local rendered_config="${RESULTS_DIR}/lmcache-config-${SLURM_NODEID:-0}.yaml" + if ! FRONTEND_NODE="$frontend_node" \ + FRONTEND_IP="$frontend_ip" \ + RESULTS_DIR="$RESULTS_DIR" \ + STORAGE_CACHE_DIR="$storage_cache_dir" \ + python3 - "$LMCACHE_CONFIG_FILE" "$rendered_config" <<'PY' +import os +import re +import sys +from pathlib import Path + +src, dst = sys.argv[1], sys.argv[2] +values = { + "frontend_node": os.environ["FRONTEND_NODE"], + "frontend_ip": os.environ["FRONTEND_IP"], + "results_dir": os.environ["RESULTS_DIR"], + "storage_cache_dir": os.environ["STORAGE_CACHE_DIR"], +} + +content = Path(src).read_text() +unknown = sorted(set(re.findall(r"\{([A-Za-z_][A-Za-z0-9_]*)\}", content)) - values.keys()) +if unknown: + raise SystemExit(f"Unknown LMCache config placeholders: {', '.join(unknown)}") + +for key, value in values.items(): + content = content.replace("{" + key + "}", value) + +Path(dst).write_text(content) +PY + then + log "ERROR: Failed to render LMCache config template: ${LMCACHE_CONFIG_FILE}" + exit 1 + fi + + export LMCACHE_CONFIG_FILE="$rendered_config" + log "Rendered LMCache config file: ${LMCACHE_CONFIG_FILE}" +} + function setup_lmcache() { if [[ -z "${LMCACHE_CONFIG_FILE:-}" ]]; then @@ -952,6 +1019,16 @@ function setup_lmcache() setup_cufile } +function launch_lmcache_controller() +{ + if [[ -z "${lmcache_controller_cmd}" ]]; then + return + fi + + log "Launching LMCache controller with cmd: ${lmcache_controller_cmd}" + ${lmcache_controller_cmd} > "${RESULTS_DIR}/lmcache_controller.log" 2>&1 +} + function log_gpu_utilization() { # Check if nvidia-smi is available @@ -1054,6 +1131,8 @@ function main() cd "$RESULTS_DIR" || { log "ERROR: Failed to cd to $RESULTS_DIR"; exit 1; } + render_lmcache_config + log_gpu_utilization & if _is_frontend_node; then @@ -1061,6 +1140,7 @@ function main() log_node_role "$(_current_node_name)" "frontend" setup_lmcache setup_kvbm + launch_lmcache_controller & launch_etcd & launch_nats & wait_for_etcd diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py index c481dc660..136ab6afc 100644 --- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py @@ -125,6 +125,8 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: if td.cmd_args.storage_cache_dir: args.append(f"--storage-cache-dir {td.cmd_args.storage_cache_dir}") + if td.cmd_args.lmcache_controller: + args.append(f"--lmcache-controller-cmd {shlex.quote(td.cmd_args.lmcache_controller.cmd)}") args.extend( self._get_toml_args( diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 2f1c3790e..192c16146 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -33,6 +33,7 @@ AIPerf, AIPerfAccuracy, GenAIPerf, + LMCacheController, WorkerBaseArgs, WorkerConfig, ) @@ -233,10 +234,11 @@ def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCo td.cmd_args.lmcache = { "chunk_size": 512, "local_cpu": True, + "controller_url": "{frontend_node}:9001", "extra_config": { "enable_nixl_storage": False, "nixl_backend": "POSIX", - "nixl_path": "/tmp/", + "nixl_path": "{storage_cache_dir}", }, } @@ -250,7 +252,18 @@ def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCo ) assert config["chunk_size"] == 512 assert config["local_cpu"] is True + assert config["controller_url"] == "{frontend_node}:9001" assert config["extra_config"]["enable_nixl_storage"] is False assert config["extra_config"]["nixl_backend"] == "POSIX" - assert config["extra_config"]["nixl_path"] == "/tmp/" + assert config["extra_config"]["nixl_path"] == "{storage_cache_dir}" assert not any(arg.startswith("--lmcache") for arg in result) + + +def test_gen_script_args_passes_lmcache_controller_cmd(strategy: AIDynamoSlurmCommandGenStrategy) -> None: + td = cast(AIDynamoTestDefinition, strategy.test_run.test) + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" + td.cmd_args.lmcache_controller = LMCacheController(cmd=cmd) + + result = strategy._gen_script_args(td) + + assert f"--lmcache-controller-cmd {shlex.quote(cmd)}" in result From 9a1be0ab47393ac481298af42bdc6382fa8adc8c Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 17:44:02 -0700 Subject: [PATCH 07/15] fix slurm job name --- conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml index 76229dbcf..5dc8b8fa5 100644 --- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -19,7 +19,7 @@ job_status_check = false [[Tests]] id = "test.disagg.lmcache-controller" -name = "vLLM disaggregated LMCache with controller" +name = "vllm-disagg-lmcache-controller" description = "Self-contained AIDynamo scenario wiring vLLM disaggregated inference, LMCache config propagation, and LMCache controller launch." test_template_name = "AIDynamo" time_limit = "00:10:00" From 5d5115ba2ed06c7fc6b07df7a7f148cb79a04e60 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 18:04:43 -0700 Subject: [PATCH 08/15] try another lmcache config --- .../ai_dynamo/test_scenario/vllm_lmcache.toml | 10 +++--- doc/workloads/ai_dynamo.rst | 35 ++++++++++--------- src/cloudai/workloads/ai_dynamo/__init__.py | 2 ++ src/cloudai/workloads/ai_dynamo/ai_dynamo.py | 1 + src/cloudai/workloads/ai_dynamo/ai_dynamo.sh | 6 ++-- .../ai_dynamo/slurm_command_gen_strategy.py | 7 ++-- .../test_command_gen_strategy_slurm.py | 12 +++++-- 7 files changed, 44 insertions(+), 29 deletions(-) diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml index 5dc8b8fa5..2db5adbf4 100644 --- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -34,9 +34,9 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] chunk_size = 256 local_cpu = true lmcache_instance_id = "lmcache_default_instance" - controller_url = "{frontend_node}:9001" - distributed_url = "{frontend_node}:8789" - lmcache_worker_port = 8788 + controller_pull_url = "{frontend_node}:8300" + controller_reply_url = "{frontend_node}:8400" + lmcache_worker_ports = [8788, 8789, 8790, 8791] max_local_cpu_size = 6.0 nixl_buffer_size = 2079377920 nixl_buffer_device = "cpu" @@ -48,7 +48,7 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] nixl_pool_size = 2048 [Tests.cmd_args.lmcache_controller] - cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-ports {\"pull\":8300,\"reply\":8400}" [Tests.cmd_args.dynamo] backend = "vllm" @@ -62,7 +62,7 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] [Tests.cmd_args.dynamo.prefill_worker.args] gpu-memory-utilization = 0.8 - kv-transfer-config = '{"kv_connector":"PdConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"},{"kv_connector":"NixlConnector","kv_role":"kv_both"}]}}' + kv-transfer-config = '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"},{"kv_connector":"NixlConnector","kv_role":"kv_both"}]}}' tensor-parallel-size = 4 pipeline-parallel-size = 1 data-parallel-size = 1 diff --git a/doc/workloads/ai_dynamo.rst b/doc/workloads/ai_dynamo.rst index 5a541af98..77e13e5ed 100644 --- a/doc/workloads/ai_dynamo.rst +++ b/doc/workloads/ai_dynamo.rst @@ -125,19 +125,20 @@ run output directory, mounts that directory as ``/cloudai_run_results``, and exp [cmd_args] [cmd_args.lmcache] - chunk_size = 256 - local_cpu = true - controller_url = "{frontend_node}:9001" - distributed_url = "{frontend_node}:8789" - max_local_cpu_size = 6.0 - nixl_buffer_size = 2079377920 - nixl_buffer_device = "cpu" - - [cmd_args.lmcache.extra_config] - enable_nixl_storage = false - nixl_backend = "POSIX" - nixl_path = "{storage_cache_dir}" - nixl_pool_size = 2048 + chunk_size = 256 + local_cpu = true + controller_pull_url = "{frontend_node}:8300" + controller_reply_url = "{frontend_node}:8400" + lmcache_worker_ports = [8788, 8789, 8790, 8791] + max_local_cpu_size = 6.0 + nixl_buffer_size = 2079377920 + nixl_buffer_device = "cpu" + + [cmd_args.lmcache.extra_config] + enable_nixl_storage = false + nixl_backend = "POSIX" + nixl_path = "{storage_cache_dir}" + nixl_pool_size = 2048 For an example that uses test-in-scenario mode, see ``conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml``. Because the test is fully defined inside the scenario, @@ -169,10 +170,12 @@ If the selected LMCache mode needs a controller, CloudAI can start one on the fr .. code-block:: toml [cmd_args.lmcache_controller] - cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-ports {\"pull\":8300,\"reply\":8400}" -This only launches the process. For disaggregated or multi-node runs, the LMCache YAML still needs a ``controller_url`` -that resolves to the frontend node from every worker, such as ``"{frontend_node}:9001"``. +This only launches the process. For disaggregated or multi-node runs, the LMCache YAML still needs controller addresses +that resolve to the frontend node from every worker. With the default controller monitor ports, use +``controller_pull_url = "{frontend_node}:8300"`` and ``controller_reply_url = "{frontend_node}:8400"``. The +``lmcache_worker_ports`` list must match the number of worker ranks. Semantic Degradation With AIPerf Accuracy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/cloudai/workloads/ai_dynamo/__init__.py b/src/cloudai/workloads/ai_dynamo/__init__.py index 61ab6f120..5e430068d 100644 --- a/src/cloudai/workloads/ai_dynamo/__init__.py +++ b/src/cloudai/workloads/ai_dynamo/__init__.py @@ -15,6 +15,7 @@ # limitations under the License. from .ai_dynamo import ( + LMCACHE_CONFIG_BACKUP_FILE_NAME, LMCACHE_CONFIG_FILE_NAME, AIDynamoArgs, AIDynamoCmdArgs, @@ -31,6 +32,7 @@ from .slurm_command_gen_strategy import AIDynamoSlurmCommandGenStrategy __all__ = [ + "LMCACHE_CONFIG_BACKUP_FILE_NAME", "LMCACHE_CONFIG_FILE_NAME", "AIDynamoArgs", "AIDynamoCmdArgs", diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 2771f5de6..4caf59e37 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -45,6 +45,7 @@ AIPERF_ACCURACY_ARTIFACTS_DIR = "aiperf_accuracy_artifacts" AIPERF_ACCURACY_RESULTS_CSV = "accuracy_results.csv" LMCACHE_CONFIG_FILE_NAME = "lmcache-config.yaml" +LMCACHE_CONFIG_BACKUP_FILE_NAME = "lmcache-config.original.yaml" class Args(BaseModel): diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh index 74d84c855..52e975850 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh @@ -54,7 +54,7 @@ dynamo_args["frontend-node"]="" dynamo_args["etcd-cmd"]="etcd --log-level debug" dynamo_args["nats-cmd"]="nats-server -js" -dynamo_args["worker-error-pattern"]="zmq.error.ZMQError:.Address.already.in.use|ERROR.core.run_engine_core:.EngineCore.failed.to.start|ERROR.multiproc_executor.worker_busy_loop:.WorkerProc.hit.an.exception|ValueError:.a.python.*async.generator:.EngineDeadError:.EngineCore.encountered.an.issue|ZeroDivisionError:.integer.division.or.modulo.by.zero|ERROR.core.run_engine_core:.EngineCore.encountered.a.fatal.error|Exception:.Failed.to.fetch.model|ERROR.*Engine.core.proc.EngineCore_.*died.unexpectedly|RuntimeError:.Engine.core.initialization.failed." +dynamo_args["worker-error-pattern"]="zmq.error.ZMQError:.Address.already.in.use|ERROR.core.run_engine_core:.EngineCore.failed.to.start|ERROR.multiproc_executor.worker_busy_loop:.WorkerProc.hit.an.exception|ValueError:.a.python.*async.generator:.EngineDeadError:.EngineCore.encountered.an.issue|ZeroDivisionError:.integer.division.or.modulo.by.zero|ERROR.core.run_engine_core:.EngineCore.encountered.a.fatal.error|Exception:.Failed.to.fetch.model|ERROR.*Engine.core.proc.EngineCore_.*died.unexpectedly|RuntimeError:.Engine.core.initialization.failed.|pydantic_core._pydantic_core.ValidationError|Unsupported.connector.type" # sglang_dsr1-specific optional ports. Ignored by vllm. dynamo_args["sgl-http-port"]=9001 @@ -970,7 +970,7 @@ function render_lmcache_config() mkdir -p "$storage_cache_dir" chmod 755 "$storage_cache_dir" - local rendered_config="${RESULTS_DIR}/lmcache-config-${SLURM_NODEID:-0}.yaml" + local rendered_config="${LMCACHE_CONFIG_FILE}.tmp.${SLURM_NODEID:-0}" if ! FRONTEND_NODE="$frontend_node" \ FRONTEND_IP="$frontend_ip" \ RESULTS_DIR="$RESULTS_DIR" \ @@ -1004,7 +1004,7 @@ PY exit 1 fi - export LMCACHE_CONFIG_FILE="$rendered_config" + mv "$rendered_config" "$LMCACHE_CONFIG_FILE" log "Rendered LMCache config file: ${LMCACHE_CONFIG_FILE}" } diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py index 136ab6afc..861a4c469 100644 --- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py @@ -25,7 +25,7 @@ from cloudai.core import File, GitRepo from cloudai.systems.slurm import SlurmCommandGenStrategy -from .ai_dynamo import LMCACHE_CONFIG_FILE_NAME, AIDynamoTestDefinition +from .ai_dynamo import LMCACHE_CONFIG_BACKUP_FILE_NAME, LMCACHE_CONFIG_FILE_NAME, AIDynamoTestDefinition class AIDynamoSlurmCommandGenStrategy(SlurmCommandGenStrategy): @@ -105,8 +105,9 @@ def _prepare_lmcache_config(self): return self.test_run.output_path.mkdir(parents=True, exist_ok=True) - config_path = self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME - config_path.write_text(yaml.safe_dump(self.td.cmd_args.lmcache, sort_keys=False)) + config = yaml.safe_dump(self.td.cmd_args.lmcache, sort_keys=False) + (self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).write_text(config) + (self.test_run.output_path / LMCACHE_CONFIG_BACKUP_FILE_NAME).write_text(config) def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: self._prepare_lmcache_config() diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 192c16146..41d6dae74 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -25,6 +25,7 @@ from cloudai.core import GitRepo from cloudai.systems.slurm import SlurmSystem from cloudai.workloads.ai_dynamo import ( + LMCACHE_CONFIG_BACKUP_FILE_NAME, LMCACHE_CONFIG_FILE_NAME, AIDynamoArgs, AIDynamoCmdArgs, @@ -234,7 +235,9 @@ def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCo td.cmd_args.lmcache = { "chunk_size": 512, "local_cpu": True, - "controller_url": "{frontend_node}:9001", + "controller_pull_url": "{frontend_node}:8300", + "controller_reply_url": "{frontend_node}:8400", + "lmcache_worker_ports": [8788, 8789, 8790, 8791], "extra_config": { "enable_nixl_storage": False, "nixl_backend": "POSIX", @@ -245,17 +248,22 @@ def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCo result = strategy._gen_script_args(td) config_path = strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME + backup_path = strategy.test_run.output_path / LMCACHE_CONFIG_BACKUP_FILE_NAME config = yaml.safe_load(config_path.read_text()) + backup_config = yaml.safe_load(backup_path.read_text()) assert ( strategy.final_env_vars["LMCACHE_CONFIG_FILE"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" ) assert config["chunk_size"] == 512 assert config["local_cpu"] is True - assert config["controller_url"] == "{frontend_node}:9001" + assert config["controller_pull_url"] == "{frontend_node}:8300" + assert config["controller_reply_url"] == "{frontend_node}:8400" + assert config["lmcache_worker_ports"] == [8788, 8789, 8790, 8791] assert config["extra_config"]["enable_nixl_storage"] is False assert config["extra_config"]["nixl_backend"] == "POSIX" assert config["extra_config"]["nixl_path"] == "{storage_cache_dir}" + assert backup_config == config assert not any(arg.startswith("--lmcache") for arg in result) From f378714a7b6ae7791261b75cab255b4f871c5630 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 18:07:59 -0700 Subject: [PATCH 09/15] bla --- .../ai_dynamo/test_scenario/vllm_lmcache.toml | 2 +- src/cloudai/_core/test_scenario.py | 11 ++++++++++- src/cloudai/models/workload.py | 15 +++++++++++---- src/cloudai/workloads/ai_dynamo/ai_dynamo.py | 3 ++- .../ai_dynamo/test_command_gen_strategy_slurm.py | 8 ++++++++ 5 files changed, 32 insertions(+), 7 deletions(-) diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml index 2db5adbf4..407ea0dd9 100644 --- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -23,7 +23,7 @@ name = "vllm-disagg-lmcache-controller" description = "Self-contained AIDynamo scenario wiring vLLM disaggregated inference, LMCache config propagation, and LMCache controller launch." test_template_name = "AIDynamo" time_limit = "00:10:00" -extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] +extra_container_mounts = ["/run/udev:/run/udev", "/tmp/cloudai:/tmp/"] [Tests.cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1" diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py index 4c768158d..c5e35244d 100644 --- a/src/cloudai/_core/test_scenario.py +++ b/src/cloudai/_core/test_scenario.py @@ -146,8 +146,17 @@ def param_space(self) -> dict[str, Any]: cmd_args_dict = flatten_dict(self.test.cmd_args.model_dump()) extra_env_vars_dict = self.test.extra_env_vars + def is_excluded_cmd_arg(key: str) -> bool: + return any( + key == excluded or key.startswith(f"{excluded}.") for excluded in self.test.dse_excluded_cmd_args + ) + action_space: dict[str, Any] = { - **{key: value for key, value in cmd_args_dict.items() if isinstance(value, list)}, + **{ + key: value + for key, value in cmd_args_dict.items() + if isinstance(value, list) and not is_excluded_cmd_arg(key) + }, **{f"extra_env_vars.{key}": value for key, value in extra_env_vars_dict.items() if isinstance(value, list)}, } if isinstance(self.num_nodes, list): diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py index 34965454a..e3fa33a1b 100644 --- a/src/cloudai/models/workload.py +++ b/src/cloudai/models/workload.py @@ -16,7 +16,7 @@ from abc import ABC from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Union +from typing import Any, ClassVar, Dict, List, Optional, Union from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from typing_extensions import Self @@ -93,6 +93,7 @@ class TestDefinition(BaseModel, ABC): __test__ = False model_config = ConfigDict(extra="forbid") + dse_excluded_cmd_args: ClassVar[tuple[str, ...]] = () name: str description: str @@ -131,10 +132,16 @@ def constraint_check(self, tr: TestRun, system: Optional[System]) -> bool: @property def is_dse_job(self) -> bool: - def check_dict(d: dict) -> bool: + def is_excluded(path: str) -> bool: + return any(path == excluded or path.startswith(f"{excluded}.") for excluded in self.dse_excluded_cmd_args) + + def check_dict(d: dict, parent_key: str = "") -> bool: if isinstance(d, dict): - for value in d.values(): - if isinstance(value, list) or (isinstance(value, dict) and check_dict(value)): + for key, value in d.items(): + path = f"{parent_key}.{key}" if parent_key else key + if is_excluded(path): + continue + if isinstance(value, list) or (isinstance(value, dict) and check_dict(value, path)): return True return False diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 4caf59e37..0777e60f1 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -17,7 +17,7 @@ import csv import logging from pathlib import Path -from typing import Literal, Optional, cast +from typing import ClassVar, Literal, Optional, cast from pydantic import ( AliasChoices, @@ -354,6 +354,7 @@ class AIDynamoTestDefinition(TestDefinition): """Test definition for AI Dynamo.""" model_config = ConfigDict(extra="forbid") + dse_excluded_cmd_args: ClassVar[tuple[str, ...]] = ("lmcache",) cmd_args: AIDynamoCmdArgs _docker_image: Optional[DockerImage] = None diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 41d6dae74..908b8701e 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -267,6 +267,14 @@ def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCo assert not any(arg.startswith("--lmcache") for arg in result) +def test_lmcache_list_values_do_not_create_dse_space(test_run: TestRun) -> None: + td = cast(AIDynamoTestDefinition, test_run.test) + td.cmd_args.lmcache = {"lmcache_worker_ports": [8788, 8789, 8790, 8791]} + + assert test_run.is_dse_job is False + assert "lmcache.lmcache_worker_ports" not in test_run.param_space + + def test_gen_script_args_passes_lmcache_controller_cmd(strategy: AIDynamoSlurmCommandGenStrategy) -> None: td = cast(AIDynamoTestDefinition, strategy.test_run.test) cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" From 3248fe142e227844c52f9f2dd73e0890acb65949 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 28 May 2026 18:19:13 -0700 Subject: [PATCH 10/15] add accuracy test --- .../ai_dynamo/test_scenario/vllm_lmcache.toml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml index 407ea0dd9..a764f70c3 100644 --- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -88,6 +88,24 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp/cloudai:/tmp/"] request-count = 50 synthetic-input-tokens-mean = 300 + [Tests.cmd_args.aiperf_accuracy] + entrypoint = "aiperf profile" + setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0" + cli = ''' +--model {model} +--url {url} +--endpoint-type chat +--streaming +--artifact-dir {artifact_dir} +--no-server-metrics +--accuracy-benchmark mmlu +--accuracy-n-shots 5 +--accuracy-tasks abstract_algebra +--concurrency 10 +--extra-inputs '{"temperature":0,"chat_template_kwargs":{"enable_thinking":false}}' +--num-requests 100 +''' + [Tests.extra_env_vars] UCX_LOG_LEVEL = "warn" HF_HUB_OFFLINE = "0" From e34a775eee75cdb1d5e24db08e8ad60b565cf57c Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 29 May 2026 09:28:40 -0700 Subject: [PATCH 11/15] exclude dse arg feature --- .../ai_dynamo/test_scenario/vllm_lmcache.toml | 1 + src/cloudai/_core/test_scenario.py | 14 ++--- src/cloudai/models/scenario.py | 2 + src/cloudai/models/workload.py | 38 +++++++++--- src/cloudai/workloads/ai_dynamo/ai_dynamo.py | 4 +- tests/test_cloudaigym.py | 16 +++++ tests/test_test_scenario.py | 58 ++++++++++++++++++- .../test_command_gen_strategy_slurm.py | 15 ++++- 8 files changed, 126 insertions(+), 22 deletions(-) diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml index a764f70c3..7e884a308 100644 --- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -24,6 +24,7 @@ description = "Self-contained AIDynamo scenario wiring vLLM disaggregated infere test_template_name = "AIDynamo" time_limit = "00:10:00" extra_container_mounts = ["/run/udev:/run/udev", "/tmp/cloudai:/tmp/"] +dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"] [Tests.cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1" diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py index c5e35244d..6c1d0ee3c 100644 --- a/src/cloudai/_core/test_scenario.py +++ b/src/cloudai/_core/test_scenario.py @@ -146,16 +146,11 @@ def param_space(self) -> dict[str, Any]: cmd_args_dict = flatten_dict(self.test.cmd_args.model_dump()) extra_env_vars_dict = self.test.extra_env_vars - def is_excluded_cmd_arg(key: str) -> bool: - return any( - key == excluded or key.startswith(f"{excluded}.") for excluded in self.test.dse_excluded_cmd_args - ) - action_space: dict[str, Any] = { **{ key: value for key, value in cmd_args_dict.items() - if isinstance(value, list) and not is_excluded_cmd_arg(key) + if isinstance(value, list) and not self.test.is_dse_excluded_arg(key) }, **{f"extra_env_vars.{key}": value for key, value in extra_env_vars_dict.items() if isinstance(value, list)}, } @@ -192,8 +187,11 @@ def apply_params_set(self, action: dict[str, Any]) -> "TestRun": attrs = key.split(".") obj = tdef.cmd_args for attr in attrs[:-1]: - obj = getattr(obj, attr) - setattr(obj, attrs[-1], value) + obj = obj[attr] if isinstance(obj, dict) else getattr(obj, attr) + if isinstance(obj, dict): + obj[attrs[-1]] = value + else: + setattr(obj, attrs[-1], value) type(tdef)(**tdef.model_dump()) # trigger validation diff --git a/src/cloudai/models/scenario.py b/src/cloudai/models/scenario.py index beeb84244..57234df23 100644 --- a/src/cloudai/models/scenario.py +++ b/src/cloudai/models/scenario.py @@ -93,6 +93,7 @@ class TestRunModel(BaseModel): description: Optional[str] = None test_template_name: Optional[str] = None cmd_args: Optional[CmdArgs] = None + dse_excluded_args: Optional[list[str]] = None extra_env_vars: dict[str, str | list[str]] | None = None extra_container_mounts: Optional[list[str]] = None git_repos: Optional[list[GitRepo]] = None @@ -114,6 +115,7 @@ def tdef_model_dump(self, by_alias: bool) -> dict: "agent_metrics": self.agent_metrics if "agent_metrics" in self.model_fields_set else None, "agent_reward_function": self.agent_reward_function, "agent_config": self.agent_config, + "dse_excluded_args": self.dse_excluded_args, "extra_container_mounts": self.extra_container_mounts, "extra_env_vars": self.extra_env_vars if self.extra_env_vars else None, "cmd_args": self.cmd_args.model_dump(by_alias=by_alias) if self.cmd_args else None, diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py index e3fa33a1b..8b981d8ea 100644 --- a/src/cloudai/models/workload.py +++ b/src/cloudai/models/workload.py @@ -16,7 +16,7 @@ from abc import ABC from dataclasses import dataclass -from typing import Any, ClassVar, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from typing_extensions import Self @@ -93,12 +93,11 @@ class TestDefinition(BaseModel, ABC): __test__ = False model_config = ConfigDict(extra="forbid") - dse_excluded_cmd_args: ClassVar[tuple[str, ...]] = () - name: str description: str test_template_name: str cmd_args: Any + dse_excluded_args: list[str] = Field(default_factory=list) extra_env_vars: dict[str, Union[str, List[str]]] = {} extra_cmd_args: dict[str, str] = {} extra_container_mounts: list[str] = [] @@ -116,6 +115,11 @@ class TestDefinition(BaseModel, ABC): def cmd_args_dict(self) -> Dict[str, Union[str, List[str]]]: return self.cmd_args.model_dump() + def is_dse_excluded_arg(self, path: str) -> bool: + """Return whether a dot-separated cmd_args path should be ignored by DSE.""" + path = f"cmd_args.{path}" + return any(path == excluded or path.startswith(f"{excluded}.") for excluded in self.dse_excluded_args) + @property def extra_args_str(self) -> str: parts = [] @@ -132,14 +136,11 @@ def constraint_check(self, tr: TestRun, system: Optional[System]) -> bool: @property def is_dse_job(self) -> bool: - def is_excluded(path: str) -> bool: - return any(path == excluded or path.startswith(f"{excluded}.") for excluded in self.dse_excluded_cmd_args) - def check_dict(d: dict, parent_key: str = "") -> bool: if isinstance(d, dict): for key, value in d.items(): path = f"{parent_key}.{key}" if parent_key else key - if is_excluded(path): + if self.is_dse_excluded_arg(path): continue if isinstance(value, list) or (isinstance(value, dict) and check_dict(value, path)): return True @@ -147,6 +148,29 @@ def check_dict(d: dict, parent_key: str = "") -> bool: return check_dict(self.cmd_args_dict) or check_dict(self.extra_env_vars) + @field_validator("dse_excluded_args", mode="before") + @classmethod + def normalize_dse_excluded_args(cls, value: Any) -> list[str]: + if value is None: + return [] + if isinstance(value, str): + value = [value] + + normalized = [] + for prefix in value: + if not isinstance(prefix, str): + raise ValueError("DSE excluded cmd_args prefixes must be strings.") + + prefix = prefix.strip() + if not prefix.startswith("cmd_args."): + raise ValueError(f"DSE excluded arg must start with 'cmd_args.': {prefix!r}") + if prefix == "cmd_args." or prefix.endswith(".") or ".." in prefix: + raise ValueError(f"Invalid DSE excluded cmd_args prefix: {prefix!r}") + + normalized.append(prefix) + + return normalized + def was_run_successful(self, tr: TestRun) -> JobStatusResult: return JobStatusResult(is_successful=True) diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 0777e60f1..7f8da4165 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -17,7 +17,7 @@ import csv import logging from pathlib import Path -from typing import ClassVar, Literal, Optional, cast +from typing import Literal, Optional, cast from pydantic import ( AliasChoices, @@ -354,8 +354,6 @@ class AIDynamoTestDefinition(TestDefinition): """Test definition for AI Dynamo.""" model_config = ConfigDict(extra="forbid") - dse_excluded_cmd_args: ClassVar[tuple[str, ...]] = ("lmcache",) - cmd_args: AIDynamoCmdArgs _docker_image: Optional[DockerImage] = None script: File = File(Path(__file__).parent.parent / "ai_dynamo/ai_dynamo.sh") diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py index ecb9eb0a5..fdda413e4 100644 --- a/tests/test_cloudaigym.py +++ b/tests/test_cloudaigym.py @@ -213,6 +213,22 @@ def test_action_space(nemorun: NeMoRunTestDefinition, setup_env: tuple[TestRun, assert action_space["NUM_NODES"] == tr.num_nodes +def test_action_space_excludes_configured_cmd_arg_prefix( + nemorun: NeMoRunTestDefinition, setup_env: tuple[TestRun, BaseRunner] +): + tr, _ = setup_env + nemorun.cmd_args.trainer = Trainer( + max_steps=[1000, 2000], strategy=TrainerStrategy(tensor_model_parallel_size=[1, 2]) + ) + nemorun.dse_excluded_args = ["cmd_args.trainer.strategy"] + tr.test = nemorun + + action_space = tr.param_space + + assert action_space["trainer.max_steps"] == [1000, 2000] + assert "trainer.strategy.tensor_model_parallel_size" not in action_space + + @pytest.mark.parametrize("num_nodes", (1, [1, 2], [3])) def test_all_combinations(nemorun: NeMoRunTestDefinition, setup_env: tuple[TestRun, BaseRunner], num_nodes: int): tr, _ = setup_env diff --git a/tests/test_test_scenario.py b/tests/test_test_scenario.py index 8b84a01db..8b390e8a1 100644 --- a/tests/test_test_scenario.py +++ b/tests/test_test_scenario.py @@ -21,7 +21,7 @@ import pytest import toml -from cloudai._core.exceptions import MissingTestError +from cloudai._core.exceptions import MissingTestError, TestConfigParsingError from cloudai.core import ( CmdArgs, GitRepo, @@ -615,6 +615,62 @@ def test_agent_config_is_merged_with_scenario_override( "start_action": "random", } + def test_dse_excluded_args_can_be_set_from_scenario_toml( + self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem + ): + test_scenario_parser.test_mapping = { + "nccl": NCCLTestDefinition( + name="nccl", + description="desc", + test_template_name="NcclTest", + cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"), + ) + } + model = TestScenarioModel.model_validate( + toml.loads( + """ + name = "test" + + [[Tests]] + id = "1" + test_name = "nccl" + dse_excluded_args = ["cmd_args.foo", "cmd_args.bar.baz"] + """ + ) + ) + tdef = test_scenario_parser._prepare_tdef(model.tests[0]) + + assert tdef.dse_excluded_args == ["cmd_args.foo", "cmd_args.bar.baz"] + + def test_dse_excluded_args_must_use_cmd_args_prefix( + self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem + ): + test_scenario_parser.test_mapping = { + "nccl": NCCLTestDefinition( + name="nccl", + description="desc", + test_template_name="NcclTest", + cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"), + ) + } + model = TestScenarioModel.model_validate( + toml.loads( + """ + name = "test" + + [[Tests]] + id = "1" + test_name = "nccl" + dse_excluded_args = ["foo"] + """ + ) + ) + + with pytest.raises(TestConfigParsingError) as excinfo: + test_scenario_parser._prepare_tdef(model.tests[0]) + + assert "DSE excluded arg must start with 'cmd_args.'" in str(excinfo.value.__cause__) + class TestReporters: def test_default(self): diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 908b8701e..0e2f23061 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -267,13 +267,22 @@ def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCo assert not any(arg.startswith("--lmcache") for arg in result) -def test_lmcache_list_values_do_not_create_dse_space(test_run: TestRun) -> None: +def test_lmcache_config_supports_dse_with_excluded_prefix(test_run: TestRun) -> None: td = cast(AIDynamoTestDefinition, test_run.test) - td.cmd_args.lmcache = {"lmcache_worker_ports": [8788, 8789, 8790, 8791]} + td.dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"] + td.cmd_args.lmcache = { + "chunk_size": [256, 512], + "lmcache_worker_ports": [8788, 8789, 8790, 8791], + } - assert test_run.is_dse_job is False + assert test_run.is_dse_job is True + assert test_run.param_space["lmcache.chunk_size"] == [256, 512] assert "lmcache.lmcache_worker_ports" not in test_run.param_space + new_test_run = test_run.apply_params_set({"lmcache.chunk_size": 512}) + + assert cast(AIDynamoTestDefinition, new_test_run.test).cmd_args.lmcache["chunk_size"] == 512 # type: ignore + def test_gen_script_args_passes_lmcache_controller_cmd(strategy: AIDynamoSlurmCommandGenStrategy) -> None: td = cast(AIDynamoTestDefinition, strategy.test_run.test) From 70cc2881797fd00df7eacce47d4db85bd032a524 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 29 May 2026 09:47:37 -0700 Subject: [PATCH 12/15] docs for the dse_excluded_args feature --- doc/USER_GUIDE.rst | 25 +++++++++++++++++++++++++ doc/workloads/ai_dynamo.rst | 4 ++++ 2 files changed, 29 insertions(+) diff --git a/doc/USER_GUIDE.rst b/doc/USER_GUIDE.rst index 5ca678531..966fadb9f 100644 --- a/doc/USER_GUIDE.rst +++ b/doc/USER_GUIDE.rst @@ -206,6 +206,31 @@ action, typically seeded by ``random_seed``. Custom agents may extend the ``BaseAgentConfig`` and offer more parameters to configure. +DSE parameter exclusions +~~~~~~~~~~~~~~~~~~~~~~~~ + +CloudAI builds the DSE parameter space implicitly from list-valued fields under ``cmd_args``, list-valued +``extra_env_vars``, and list-valued ``num_nodes``. If a list-valued ``cmd_args`` field is configuration data rather than +a sweep dimension, exclude it with ``dse_excluded_args`` in the test or scenario definition. + +Entries in ``dse_excluded_args`` must be dot-separated paths that start with ``cmd_args.``. Each entry excludes that +field and any nested fields below it from DSE parameter discovery: + +.. code-block:: toml + + [[Tests]] + id = "Tests.1" + test_name = "my_test" + dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"] + + [Tests.cmd_args.lmcache] + chunk_size = [256, 512] + lmcache_worker_ports = [8788, 8789, 8790, 8791] + +In this example, ``cmd_args.lmcache.chunk_size`` is still swept, while +``cmd_args.lmcache.lmcache_worker_ports`` is treated as a single configuration value. The exclusion mechanism currently +applies only to ``cmd_args`` paths; it does not exclude ``extra_env_vars`` or ``num_nodes`` from DSE. + Metric errors and report strategies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/workloads/ai_dynamo.rst b/doc/workloads/ai_dynamo.rst index 77e13e5ed..7dbba92b6 100644 --- a/doc/workloads/ai_dynamo.rst +++ b/doc/workloads/ai_dynamo.rst @@ -149,6 +149,10 @@ For an example that uses test-in-scenario mode, see uv run cloudai run --system-config \ --test-scenario conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml +The example sets ``dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]`` because +``lmcache_worker_ports`` is a list-valued LMCache setting, not a DSE sweep dimension. Other list-valued LMCache fields +can still be swept unless their ``cmd_args.`` path is also excluded. + Alternatively, mount your own LMCache YAML file with ``extra_container_mounts`` and set ``LMCACHE_CONFIG_FILE`` through ``extra_env_vars``: From c1c6aa004007bf00ca6d0bc51e3ccdda7d7dde1f Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 29 May 2026 10:20:29 -0700 Subject: [PATCH 13/15] make lmcache vllm conf nicer --- conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml index 7e884a308..564311240 100644 --- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -23,11 +23,12 @@ name = "vllm-disagg-lmcache-controller" description = "Self-contained AIDynamo scenario wiring vLLM disaggregated inference, LMCache config propagation, and LMCache controller launch." test_template_name = "AIDynamo" time_limit = "00:10:00" -extra_container_mounts = ["/run/udev:/run/udev", "/tmp/cloudai:/tmp/"] +extra_container_mounts = ["/run/udev:/run/udev"] dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"] [Tests.cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1" + # storage_cache_dir = "/lustre/.../install/tmp" workloads = "aiperf.sh" [Tests.cmd_args.lmcache] From 0468e650d9d14d3b00377acca846b908fbad2f0e Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 29 May 2026 12:00:46 -0700 Subject: [PATCH 14/15] make aidynamo test more consistent --- tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 0e2f23061..544bb064a 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -264,7 +264,7 @@ def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCo assert config["extra_config"]["nixl_backend"] == "POSIX" assert config["extra_config"]["nixl_path"] == "{storage_cache_dir}" assert backup_config == config - assert not any(arg.startswith("--lmcache") for arg in result) + assert "--lmcache" not in result def test_lmcache_config_supports_dse_with_excluded_prefix(test_run: TestRun) -> None: From 1ded4b55375bacb8142b4812da4baa2d149cc418 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 29 May 2026 18:39:20 -0700 Subject: [PATCH 15/15] fix vllm config --- conf/experimental/ai_dynamo/test/vllm.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml index 8a5f3b939..85f7d353f 100644 --- a/conf/experimental/ai_dynamo/test/vllm.toml +++ b/conf/experimental/ai_dynamo/test/vllm.toml @@ -38,6 +38,7 @@ workloads = "aiperf.sh" tensor-parallel-size = 8 pipeline-parallel-size = 1 data-parallel-size = 1 + kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' [cmd_args.dynamo.decode_worker] num-nodes = 1 @@ -50,6 +51,7 @@ workloads = "aiperf.sh" tensor-parallel-size = 8 pipeline-parallel-size = 1 data-parallel-size = 1 + kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' [cmd_args.lmcache_controller] cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001"