diff --git a/conf/experimental/ai_dynamo/test/sglang.toml b/conf/experimental/ai_dynamo/test/sglang.toml index 37b2c392b..67fc999f0 100644 --- a/conf/experimental/ai_dynamo/test/sglang.toml +++ b/conf/experimental/ai_dynamo/test/sglang.toml @@ -17,7 +17,7 @@ name = "sglang" description = "sglang backend" test_template_name = "AIDynamo" -extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] +extra_container_mounts = ["/run/udev:/run/udev"] [cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.1.1" @@ -59,23 +59,8 @@ workloads = "aiperf.sh" host = "0.0.0.0" disaggregation-transfer-backend = "nixl" - [cmd_args.lmcache] - controller_cmd = "lmcache_controller --host localhost --port 9000 --monitor-port 9001" - - [cmd_args.lmcache.args] - chunk_size = 256 - local_cpu = false - nixl_buffer_size = 10737418240 - nixl_buffer_device = "cuda" - extra_config_enable_nixl_storage = true - extra_config_nixl_backend = "GDS_MT" - extra_config_nixl_file_pool_size = 64 - - enable_controller = true - lmcache_instance_id = "lmcache_default_instance" - controller_url = "localhost:9001" - lmcache_worker_port = 8788 - distributed_url = "localhost:8789" + [cmd_args.lmcache_controller] + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" [cmd_args.genai_perf] cmd = "genai-perf profile" diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml index 583d11a88..85f7d353f 100644 --- a/conf/experimental/ai_dynamo/test/vllm.toml +++ b/conf/experimental/ai_dynamo/test/vllm.toml @@ -17,7 +17,7 @@ name = "vLLM" description = "vLLM backend" test_template_name = "AIDynamo" -extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] +extra_container_mounts = ["/run/udev:/run/udev"] [cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1" @@ -53,23 +53,8 @@ workloads = "aiperf.sh" data-parallel-size = 1 kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' - [cmd_args.lmcache] - controller_cmd = "lmcache_controller --host localhost --port 9000 --monitor-port 9001" - - [cmd_args.lmcache.args] - chunk_size = 256 - local_cpu = false - nixl_buffer_size = 10737418240 - nixl_buffer_device = "cuda" - extra_config_enable_nixl_storage = true - extra_config_nixl_backend = "GDS_MT" - extra_config_nixl_file_pool_size = 64 - - enable_controller = true - lmcache_instance_id = "lmcache_default_instance" - controller_url = "localhost:9001" - lmcache_worker_port = 8788 - distributed_url = "localhost:8789" + [cmd_args.lmcache_controller] + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" [cmd_args.genai_perf] cmd = "genai-perf profile" diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml new file mode 100644 index 000000000..564311240 --- /dev/null +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml @@ -0,0 +1,117 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name = "dynamo-vllm-lmcache" +job_status_check = false + +[[Tests]] +id = "test.disagg.lmcache-controller" +name = "vllm-disagg-lmcache-controller" +description = "Self-contained AIDynamo scenario wiring vLLM disaggregated inference, LMCache config propagation, and LMCache controller launch." +test_template_name = "AIDynamo" +time_limit = "00:10:00" +extra_container_mounts = ["/run/udev:/run/udev"] +dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"] + + [Tests.cmd_args] + docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1" + # storage_cache_dir = "/lustre/.../install/tmp" + workloads = "aiperf.sh" + + [Tests.cmd_args.lmcache] + enable_controller = true + chunk_size = 256 + local_cpu = true + lmcache_instance_id = "lmcache_default_instance" + controller_pull_url = "{frontend_node}:8300" + controller_reply_url = "{frontend_node}:8400" + lmcache_worker_ports = [8788, 8789, 8790, 8791] + max_local_cpu_size = 6.0 + nixl_buffer_size = 2079377920 + nixl_buffer_device = "cpu" + + [Tests.cmd_args.lmcache.extra_config] + enable_nixl_storage = false + nixl_backend = "POSIX" + nixl_path = "{storage_cache_dir}" + nixl_pool_size = 2048 + + [Tests.cmd_args.lmcache_controller] + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-ports {\"pull\":8300,\"reply\":8400}" + + [Tests.cmd_args.dynamo] + backend = "vllm" + model = "Qwen/Qwen3-0.6B" + + [Tests.cmd_args.dynamo.prefill_worker] + num-nodes = 1 + cmd = 'python3 -m dynamo.vllm --is-prefill-worker' + worker-initialized-regex = 'VllmWorker.*has.been.initialized' + extra-args = "--no-enable-expert-parallel" + + [Tests.cmd_args.dynamo.prefill_worker.args] + gpu-memory-utilization = 0.8 + kv-transfer-config = '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"},{"kv_connector":"NixlConnector","kv_role":"kv_both"}]}}' + tensor-parallel-size = 4 + pipeline-parallel-size = 1 + data-parallel-size = 1 + + [Tests.cmd_args.dynamo.decode_worker] + num-nodes = 1 + cmd = 'python3 -m dynamo.vllm' + worker-initialized-regex = 'VllmWorker.*has.been.initialized' + extra-args = "--no-enable-expert-parallel" + + [Tests.cmd_args.dynamo.decode_worker.args] + gpu-memory-utilization = 0.8 + kv-transfer-config = '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}' + tensor-parallel-size = 4 + pipeline-parallel-size = 1 + data-parallel-size = 1 + + [Tests.cmd_args.aiperf] + [Tests.cmd_args.aiperf.args] + concurrency = 2 + extra-inputs = '{"min_tokens":10}' + output-tokens-mean = 500 + request-count = 50 + synthetic-input-tokens-mean = 300 + + [Tests.cmd_args.aiperf_accuracy] + entrypoint = "aiperf profile" + setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0" + cli = ''' +--model {model} +--url {url} +--endpoint-type chat +--streaming +--artifact-dir {artifact_dir} +--no-server-metrics +--accuracy-benchmark mmlu +--accuracy-n-shots 5 +--accuracy-tasks abstract_algebra +--concurrency 10 +--extra-inputs '{"temperature":0,"chat_template_kwargs":{"enable_thinking":false}}' +--num-requests 100 +''' + + [Tests.extra_env_vars] + UCX_LOG_LEVEL = "warn" + HF_HUB_OFFLINE = "0" + TRANSFORMERS_OFFLINE = "0" + HF_DATASETS_OFFLINE = "0" + DYNAMO_NODELIST = "$(scontrol show hostname $SLURM_JOB_NODELIST | tr -s '\\n' ',')" + UCX_TLS = "all" diff --git a/doc/USER_GUIDE.rst b/doc/USER_GUIDE.rst index 5ca678531..966fadb9f 100644 --- a/doc/USER_GUIDE.rst +++ b/doc/USER_GUIDE.rst @@ -206,6 +206,31 @@ action, typically seeded by ``random_seed``. Custom agents may extend the ``BaseAgentConfig`` and offer more parameters to configure. +DSE parameter exclusions +~~~~~~~~~~~~~~~~~~~~~~~~ + +CloudAI builds the DSE parameter space implicitly from list-valued fields under ``cmd_args``, list-valued +``extra_env_vars``, and list-valued ``num_nodes``. If a list-valued ``cmd_args`` field is configuration data rather than +a sweep dimension, exclude it with ``dse_excluded_args`` in the test or scenario definition. + +Entries in ``dse_excluded_args`` must be dot-separated paths that start with ``cmd_args.``. Each entry excludes that +field and any nested fields below it from DSE parameter discovery: + +.. code-block:: toml + + [[Tests]] + id = "Tests.1" + test_name = "my_test" + dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"] + + [Tests.cmd_args.lmcache] + chunk_size = [256, 512] + lmcache_worker_ports = [8788, 8789, 8790, 8791] + +In this example, ``cmd_args.lmcache.chunk_size`` is still swept, while +``cmd_args.lmcache.lmcache_worker_ports`` is treated as a single configuration value. The exclusion mechanism currently +applies only to ``cmd_args`` paths; it does not exclude ``extra_env_vars`` or ``num_nodes`` from DSE. + Metric errors and report strategies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/workloads/ai_dynamo.rst b/doc/workloads/ai_dynamo.rst index c00449681..7dbba92b6 100644 --- a/doc/workloads/ai_dynamo.rst +++ b/doc/workloads/ai_dynamo.rst @@ -110,6 +110,77 @@ To use genai-perf, set: output-tokens-mean = 500 request-count = 50 +Propagating LMCache Configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +AIDynamo can pass an LMCache YAML config to the worker processes by setting ``LMCACHE_CONFIG_FILE`` inside the +container. This only propagates the LMCache configuration; the vLLM/SGLang runtime still needs to be launched with the +appropriate LMCache or KV-transfer connector for that image/version. + +The preferred form is structured TOML under ``[cmd_args.lmcache]``. CloudAI converts that object to YAML in the +run output directory, mounts that directory as ``/cloudai_run_results``, and exports the generated file path as +``LMCACHE_CONFIG_FILE``: + +.. code-block:: toml + + [cmd_args] + [cmd_args.lmcache] + chunk_size = 256 + local_cpu = true + controller_pull_url = "{frontend_node}:8300" + controller_reply_url = "{frontend_node}:8400" + lmcache_worker_ports = [8788, 8789, 8790, 8791] + max_local_cpu_size = 6.0 + nixl_buffer_size = 2079377920 + nixl_buffer_device = "cpu" + + [cmd_args.lmcache.extra_config] + enable_nixl_storage = false + nixl_backend = "POSIX" + nixl_path = "{storage_cache_dir}" + nixl_pool_size = 2048 + +For an example that uses test-in-scenario mode, see +``conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml``. Because the test is fully defined inside the scenario, +``--tests-dir`` is not required when running that example: + +.. code-block:: bash + + uv run cloudai run --system-config \ + --test-scenario conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml + +The example sets ``dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]`` because +``lmcache_worker_ports`` is a list-valued LMCache setting, not a DSE sweep dimension. Other list-valued LMCache fields +can still be swept unless their ``cmd_args.`` path is also excluded. + +Alternatively, mount your own LMCache YAML file with ``extra_container_mounts`` and set ``LMCACHE_CONFIG_FILE`` through +``extra_env_vars``: + +.. code-block:: toml + + extra_container_mounts = ["/host/lmcache:/lmcache"] + extra_env_vars = { LMCACHE_CONFIG_FILE = "/lmcache/config.yaml" } + +For multi-node LMCache storage tests, any path referenced by the LMCache YAML, such as ``nixl_path`` for POSIX-backed +storage, must be visible and writable from every node that is expected to share cached data. A node-local path such as +``/tmp`` is suitable only for single-node smoke tests or configuration propagation checks. + +LMCache YAML values can use runtime placeholders. CloudAI renders them inside the Slurm job before launching workers: +``{frontend_node}``, ``{frontend_ip}``, ``{results_dir}``, and ``{storage_cache_dir}``. Unknown placeholders fail the +run before worker processes start. + +If the selected LMCache mode needs a controller, CloudAI can start one on the frontend node: + +.. code-block:: toml + + [cmd_args.lmcache_controller] + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-ports {\"pull\":8300,\"reply\":8400}" + +This only launches the process. For disaggregated or multi-node runs, the LMCache YAML still needs controller addresses +that resolve to the frontend node from every worker. With the default controller monitor ports, use +``controller_pull_url = "{frontend_node}:8300"`` and ``controller_reply_url = "{frontend_node}:8400"``. The +``lmcache_worker_ports`` list must match the number of worker ranks. + Semantic Degradation With AIPerf Accuracy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -215,6 +286,7 @@ Supported Backends The following backends are available via the ``conf/experimental/ai_dynamo/test/`` directory: - **vLLM** (``vllm.toml``) — use with ``test_scenario/vllm_slurm.toml`` +- **vLLM with LMCache config propagation** — use self-contained scenario ``test_scenario/vllm_lmcache.toml`` - **sglang** (``sglang.toml``) — use with ``test_scenario/sglang_slurm.toml`` Both backends use ``aiperf`` as the default benchmark tool and support disaggregated prefill/decode. diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py index 4c768158d..6c1d0ee3c 100644 --- a/src/cloudai/_core/test_scenario.py +++ b/src/cloudai/_core/test_scenario.py @@ -147,7 +147,11 @@ def param_space(self) -> dict[str, Any]: extra_env_vars_dict = self.test.extra_env_vars action_space: dict[str, Any] = { - **{key: value for key, value in cmd_args_dict.items() if isinstance(value, list)}, + **{ + key: value + for key, value in cmd_args_dict.items() + if isinstance(value, list) and not self.test.is_dse_excluded_arg(key) + }, **{f"extra_env_vars.{key}": value for key, value in extra_env_vars_dict.items() if isinstance(value, list)}, } if isinstance(self.num_nodes, list): @@ -183,8 +187,11 @@ def apply_params_set(self, action: dict[str, Any]) -> "TestRun": attrs = key.split(".") obj = tdef.cmd_args for attr in attrs[:-1]: - obj = getattr(obj, attr) - setattr(obj, attrs[-1], value) + obj = obj[attr] if isinstance(obj, dict) else getattr(obj, attr) + if isinstance(obj, dict): + obj[attrs[-1]] = value + else: + setattr(obj, attrs[-1], value) type(tdef)(**tdef.model_dump()) # trigger validation diff --git a/src/cloudai/models/scenario.py b/src/cloudai/models/scenario.py index beeb84244..57234df23 100644 --- a/src/cloudai/models/scenario.py +++ b/src/cloudai/models/scenario.py @@ -93,6 +93,7 @@ class TestRunModel(BaseModel): description: Optional[str] = None test_template_name: Optional[str] = None cmd_args: Optional[CmdArgs] = None + dse_excluded_args: Optional[list[str]] = None extra_env_vars: dict[str, str | list[str]] | None = None extra_container_mounts: Optional[list[str]] = None git_repos: Optional[list[GitRepo]] = None @@ -114,6 +115,7 @@ def tdef_model_dump(self, by_alias: bool) -> dict: "agent_metrics": self.agent_metrics if "agent_metrics" in self.model_fields_set else None, "agent_reward_function": self.agent_reward_function, "agent_config": self.agent_config, + "dse_excluded_args": self.dse_excluded_args, "extra_container_mounts": self.extra_container_mounts, "extra_env_vars": self.extra_env_vars if self.extra_env_vars else None, "cmd_args": self.cmd_args.model_dump(by_alias=by_alias) if self.cmd_args else None, diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py index 34965454a..8b981d8ea 100644 --- a/src/cloudai/models/workload.py +++ b/src/cloudai/models/workload.py @@ -93,11 +93,11 @@ class TestDefinition(BaseModel, ABC): __test__ = False model_config = ConfigDict(extra="forbid") - name: str description: str test_template_name: str cmd_args: Any + dse_excluded_args: list[str] = Field(default_factory=list) extra_env_vars: dict[str, Union[str, List[str]]] = {} extra_cmd_args: dict[str, str] = {} extra_container_mounts: list[str] = [] @@ -115,6 +115,11 @@ class TestDefinition(BaseModel, ABC): def cmd_args_dict(self) -> Dict[str, Union[str, List[str]]]: return self.cmd_args.model_dump() + def is_dse_excluded_arg(self, path: str) -> bool: + """Return whether a dot-separated cmd_args path should be ignored by DSE.""" + path = f"cmd_args.{path}" + return any(path == excluded or path.startswith(f"{excluded}.") for excluded in self.dse_excluded_args) + @property def extra_args_str(self) -> str: parts = [] @@ -131,15 +136,41 @@ def constraint_check(self, tr: TestRun, system: Optional[System]) -> bool: @property def is_dse_job(self) -> bool: - def check_dict(d: dict) -> bool: + def check_dict(d: dict, parent_key: str = "") -> bool: if isinstance(d, dict): - for value in d.values(): - if isinstance(value, list) or (isinstance(value, dict) and check_dict(value)): + for key, value in d.items(): + path = f"{parent_key}.{key}" if parent_key else key + if self.is_dse_excluded_arg(path): + continue + if isinstance(value, list) or (isinstance(value, dict) and check_dict(value, path)): return True return False return check_dict(self.cmd_args_dict) or check_dict(self.extra_env_vars) + @field_validator("dse_excluded_args", mode="before") + @classmethod + def normalize_dse_excluded_args(cls, value: Any) -> list[str]: + if value is None: + return [] + if isinstance(value, str): + value = [value] + + normalized = [] + for prefix in value: + if not isinstance(prefix, str): + raise ValueError("DSE excluded cmd_args prefixes must be strings.") + + prefix = prefix.strip() + if not prefix.startswith("cmd_args."): + raise ValueError(f"DSE excluded arg must start with 'cmd_args.': {prefix!r}") + if prefix == "cmd_args." or prefix.endswith(".") or ".." in prefix: + raise ValueError(f"Invalid DSE excluded cmd_args prefix: {prefix!r}") + + normalized.append(prefix) + + return normalized + def was_run_successful(self, tr: TestRun) -> JobStatusResult: return JobStatusResult(is_successful=True) diff --git a/src/cloudai/workloads/ai_dynamo/__init__.py b/src/cloudai/workloads/ai_dynamo/__init__.py index 4aac3fd2c..5e430068d 100644 --- a/src/cloudai/workloads/ai_dynamo/__init__.py +++ b/src/cloudai/workloads/ai_dynamo/__init__.py @@ -15,14 +15,15 @@ # limitations under the License. from .ai_dynamo import ( + LMCACHE_CONFIG_BACKUP_FILE_NAME, + LMCACHE_CONFIG_FILE_NAME, AIDynamoArgs, AIDynamoCmdArgs, AIDynamoTestDefinition, AIPerf, AIPerfAccuracy, GenAIPerf, - LMCache, - LMCacheArgs, + LMCacheController, WorkerBaseArgs, WorkerConfig, ) @@ -31,6 +32,8 @@ from .slurm_command_gen_strategy import AIDynamoSlurmCommandGenStrategy __all__ = [ + "LMCACHE_CONFIG_BACKUP_FILE_NAME", + "LMCACHE_CONFIG_FILE_NAME", "AIDynamoArgs", "AIDynamoCmdArgs", "AIDynamoKubernetesJsonGenStrategy", @@ -40,8 +43,7 @@ "AIPerf", "AIPerfAccuracy", "GenAIPerf", - "LMCache", - "LMCacheArgs", + "LMCacheController", "WorkerBaseArgs", "WorkerConfig", ] diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 35da5b782..7f8da4165 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -44,6 +44,8 @@ AIPERF_ARTIFACTS_DIR = "aiperf_artifacts" AIPERF_ACCURACY_ARTIFACTS_DIR = "aiperf_accuracy_artifacts" AIPERF_ACCURACY_RESULTS_CSV = "accuracy_results.csv" +LMCACHE_CONFIG_FILE_NAME = "lmcache-config.yaml" +LMCACHE_CONFIG_BACKUP_FILE_NAME = "lmcache-config.original.yaml" class Args(BaseModel): @@ -230,49 +232,6 @@ def populate_prefill_decode_args(self) -> "AIDynamoArgs": return self -class LMCacheArgs(BaseModel): - """Arguments for LMCache.""" - - model_config = ConfigDict(extra="allow") - - chunk_size: int = 256 - local_cpu: bool = False - nixl_buffer_size: int = 10737418240 - nixl_buffer_device: str = "cuda" - extra_config_enable_nixl_storage: bool = True - extra_config_nixl_backend: str = "GDS_MT" - extra_config_nixl_file_pool_size: int = 64 - - # LMCache controller configuration - enable_controller: bool = True - lmcache_instance_id: str = "lmcache_default_instance" - controller_url: str = "localhost:9001" - lmcache_worker_port: int = 8788 - distributed_url: str = "localhost:8789" - - -class LMCache(BaseModel): - """LMCache configuration.""" - - model_config = ConfigDict(extra="forbid") - - controller_cmd: str = "lmcache_controller --host localhost --port 9000 --monitor-port 9001" - repo: GitRepo = GitRepo( - url="https://github.com/LMCache/LMCache.git", commit="ab8530993992db873869ba882320953582d94309" - ) - - args: LMCacheArgs = Field(default_factory=LMCacheArgs) - extra_args: str | list[str] | None = Field( - default=None, - serialization_alias="extra-args", - validation_alias=AliasChoices("extra-args", "extra_args"), - ) - - @property - def installables(self) -> list[Installable]: - return [self.repo] - - class GenAIPerf(Workload): """Workload configuration for GenAI performance profiling.""" @@ -345,6 +304,14 @@ class Constraints(BaseModel): tp_times_pp_le_gpus_per_node: bool = True +class LMCacheController(BaseModel): + """Optional LMCache controller process to launch on the frontend node.""" + + model_config = ConfigDict(extra="forbid") + + cmd: str + + class AIDynamoCmdArgs(CmdArgs): """Arguments for AI Dynamo.""" @@ -353,7 +320,8 @@ class AIDynamoCmdArgs(CmdArgs): docker_image_url: str storage_cache_dir: Optional[str | list[str]] = Field(default="/tmp", serialization_alias="storage_cache_dir") dynamo: AIDynamoArgs - lmcache: LMCache = Field(default_factory=LMCache) + lmcache: dict | None = None + lmcache_controller: LMCacheController | None = None genai_perf: GenAIPerf = Field(default_factory=GenAIPerf) aiperf: AIPerf = Field(default_factory=AIPerf) aiperf_accuracy: AIPerfAccuracy | None = None @@ -376,7 +344,6 @@ def workloads_list(self) -> list[str]: @property def installables(self) -> list[Installable]: return [ - *self.lmcache.installables, *self.genai_perf.installables, *self.aiperf.installables, *(self.aiperf_accuracy.installables if self.aiperf_accuracy else []), @@ -387,7 +354,6 @@ class AIDynamoTestDefinition(TestDefinition): """Test definition for AI Dynamo.""" model_config = ConfigDict(extra="forbid") - cmd_args: AIDynamoCmdArgs _docker_image: Optional[DockerImage] = None script: File = File(Path(__file__).parent.parent / "ai_dynamo/ai_dynamo.sh") diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh index 5b65db41f..52e975850 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh @@ -31,8 +31,6 @@ declare -A prefill_config declare -A prefill_args declare -A decode_config declare -A decode_args -declare -A lmcache_args -declare -A lmcache_config declare -A genai_perf_args declare -A genai_perf_config declare -A aiperf_args @@ -40,6 +38,8 @@ declare -A aiperf_config declare -A aiperf_accuracy_args declare -A aiperf_accuracy_config +lmcache_controller_cmd="" + declare -A dynamo_args dynamo_args["backend"]="vllm" dynamo_args["node-setup-cmd"]="" @@ -54,7 +54,7 @@ dynamo_args["frontend-node"]="" dynamo_args["etcd-cmd"]="etcd --log-level debug" dynamo_args["nats-cmd"]="nats-server -js" -dynamo_args["worker-error-pattern"]="zmq.error.ZMQError:.Address.already.in.use|ERROR.core.run_engine_core:.EngineCore.failed.to.start|ERROR.multiproc_executor.worker_busy_loop:.WorkerProc.hit.an.exception|ValueError:.a.python.*async.generator:.EngineDeadError:.EngineCore.encountered.an.issue|ZeroDivisionError:.integer.division.or.modulo.by.zero|ERROR.core.run_engine_core:.EngineCore.encountered.a.fatal.error|Exception:.Failed.to.fetch.model|ERROR.*Engine.core.proc.EngineCore_.*died.unexpectedly|RuntimeError:.Engine.core.initialization.failed." +dynamo_args["worker-error-pattern"]="zmq.error.ZMQError:.Address.already.in.use|ERROR.core.run_engine_core:.EngineCore.failed.to.start|ERROR.multiproc_executor.worker_busy_loop:.WorkerProc.hit.an.exception|ValueError:.a.python.*async.generator:.EngineDeadError:.EngineCore.encountered.an.issue|ZeroDivisionError:.integer.division.or.modulo.by.zero|ERROR.core.run_engine_core:.EngineCore.encountered.a.fatal.error|Exception:.Failed.to.fetch.model|ERROR.*Engine.core.proc.EngineCore_.*died.unexpectedly|RuntimeError:.Engine.core.initialization.failed.|pydantic_core._pydantic_core.ValidationError|Unsupported.connector.type" # sglang_dsr1-specific optional ports. Ignored by vllm. dynamo_args["sgl-http-port"]=9001 @@ -163,10 +163,8 @@ _parse_cli_pairs() { decode_args["--${key#--decode-args-}"]="$2" ;; --decode-*) decode_config["${key#--decode-}"]="$2" ;; - --lmcache-args-*) - lmcache_args["${key#--lmcache-args-}"]="$2" ;; - --lmcache-*) - lmcache_config["${key#--lmcache-}"]="$2" ;; + --lmcache-controller-cmd) + lmcache_controller_cmd="$2" ;; --genai_perf-args-*) genai_perf_args["--${key#--genai_perf-args-}"]="$2" ;; --genai_perf-*) @@ -267,7 +265,7 @@ _has_connector() { } _apply_connector_settings() { - if _has_connector "lmcache"; then + if _has_connector "lmcache" || [[ -n "${LMCACHE_CONFIG_FILE:-}" ]]; then export ENABLE_LMCACHE=1 fi if _has_connector "kvbm"; then @@ -365,8 +363,8 @@ _dump_args() { log "Prefill args:\n$(arg_array_to_string prefill_args)" log "Decode config params:\n$(arg_array_to_string decode_config)" log "Decode args:\n$(arg_array_to_string decode_args)" - log "LMCache config params:\n$(arg_array_to_string lmcache_config)" - log "LMCache args:\n$(arg_array_to_string lmcache_args)" + log "LMCache config file: ${LMCACHE_CONFIG_FILE:-}" + log "LMCache controller cmd: ${lmcache_controller_cmd}" log "GenAI config params:\n$(arg_array_to_string genai_perf_config)" log "GenAI-Perf args:\n$(arg_array_to_string genai_perf_args)" log "AIPerf config params:\n$(arg_array_to_string aiperf_config)" @@ -854,16 +852,6 @@ function launch_prefill() done } -function launch_lmcache_controller() -{ - if ! _has_connector "lmcache"; then - return - fi - - log "Launching LMCache controller with cmd: ${lmcache_config["controller_cmd"]}" - ${lmcache_config["controller_cmd"]} > ${RESULTS_DIR}/lmcache_controller.log 2>&1 -} - function wait_for_dynamo_frontend() { local want_prefill=$(_expected_ready_prefill) @@ -945,6 +933,11 @@ function setup_storage_cache_dir() chmod 755 "${STORAGE_CACHE_DIR}" } +function lmcache_storage_cache_dir() +{ + echo "${STORAGE_CACHE_DIR_BASE}/${TEST_USER}/${dynamo_args["frontend-node"]}/lmcache/cache" +} + function setup_kvbm() { if ! _has_connector "kvbm"; then @@ -958,47 +951,84 @@ function setup_kvbm() setup_cufile } -function setup_lmcache() +function render_lmcache_config() { - if ! _has_connector "lmcache"; then - log "Connector list does not include lmcache. Skipping setup_lmcache" + if [[ -z "${LMCACHE_CONFIG_FILE:-}" ]]; then return fi - _require_cmd uv - local lmcache_path="${lmcache_config["repo"]}" - log "Setting up LMCache; installing LMCache using: uv pip install $lmcache_path" - uv pip install -e "$lmcache_path" - - setup_storage_cache_dir "lmcache" + if [[ ! -f "${LMCACHE_CONFIG_FILE}" ]]; then + log "ERROR: LMCACHE_CONFIG_FILE does not exist: ${LMCACHE_CONFIG_FILE}" + exit 1 + fi - export LMCACHE_CONFIG_FILE=$RESULTS_DIR/lmcache-nixl-config.yaml - rm -f $LMCACHE_CONFIG_FILE + _require_cmd python3 - lmcache_args["extra_config_nixl_path"]="$STORAGE_CACHE_DIR" + local frontend_node="${dynamo_args["frontend-node"]}" + local frontend_ip="$(_resolve_host_ip "$frontend_node")" + local storage_cache_dir="$(lmcache_storage_cache_dir)" + mkdir -p "$storage_cache_dir" + chmod 755 "$storage_cache_dir" + + local rendered_config="${LMCACHE_CONFIG_FILE}.tmp.${SLURM_NODEID:-0}" + if ! FRONTEND_NODE="$frontend_node" \ + FRONTEND_IP="$frontend_ip" \ + RESULTS_DIR="$RESULTS_DIR" \ + STORAGE_CACHE_DIR="$storage_cache_dir" \ + python3 - "$LMCACHE_CONFIG_FILE" "$rendered_config" <<'PY' +import os +import re +import sys +from pathlib import Path + +src, dst = sys.argv[1], sys.argv[2] +values = { + "frontend_node": os.environ["FRONTEND_NODE"], + "frontend_ip": os.environ["FRONTEND_IP"], + "results_dir": os.environ["RESULTS_DIR"], + "storage_cache_dir": os.environ["STORAGE_CACHE_DIR"], +} + +content = Path(src).read_text() +unknown = sorted(set(re.findall(r"\{([A-Za-z_][A-Za-z0-9_]*)\}", content)) - values.keys()) +if unknown: + raise SystemExit(f"Unknown LMCache config placeholders: {', '.join(unknown)}") + +for key, value in values.items(): + content = content.replace("{" + key + "}", value) + +Path(dst).write_text(content) +PY + then + log "ERROR: Failed to render LMCache config template: ${LMCACHE_CONFIG_FILE}" + exit 1 + fi - for key in "${!lmcache_args[@]}"; do - shopt -s nocasematch - if [[ "$key" == "extra_config"* ]]; then - continue - fi + mv "$rendered_config" "$LMCACHE_CONFIG_FILE" + log "Rendered LMCache config file: ${LMCACHE_CONFIG_FILE}" +} - val="${lmcache_args[$key]}" - echo "$key: $val" >> $LMCACHE_CONFIG_FILE - done +function setup_lmcache() +{ + if [[ -z "${LMCACHE_CONFIG_FILE:-}" ]]; then + log "LMCACHE_CONFIG_FILE is not set. Skipping setup_lmcache" + return + fi - echo "extra_config:" >> $LMCACHE_CONFIG_FILE - for key in "${!lmcache_args[@]}"; do - shopt -s nocasematch - if [[ "$key" == "extra_config"* ]]; then - nkey="${key#extra_config_}" - val="${lmcache_args[$key]}" - echo " $nkey: $val" >> $LMCACHE_CONFIG_FILE - fi - done + log "Using LMCache config file: ${LMCACHE_CONFIG_FILE}" setup_cufile } +function launch_lmcache_controller() +{ + if [[ -z "${lmcache_controller_cmd}" ]]; then + return + fi + + log "Launching LMCache controller with cmd: ${lmcache_controller_cmd}" + ${lmcache_controller_cmd} > "${RESULTS_DIR}/lmcache_controller.log" 2>&1 +} + function log_gpu_utilization() { # Check if nvidia-smi is available @@ -1101,6 +1131,8 @@ function main() cd "$RESULTS_DIR" || { log "ERROR: Failed to cd to $RESULTS_DIR"; exit 1; } + render_lmcache_config + log_gpu_utilization & if _is_frontend_node; then @@ -1108,6 +1140,7 @@ function main() log_node_role "$(_current_node_name)" "frontend" setup_lmcache setup_kvbm + launch_lmcache_controller & launch_etcd & launch_nats & wait_for_etcd @@ -1130,8 +1163,6 @@ function main() fi if _is_frontend_node; then - launch_lmcache_controller & - sleep 10 launch_workloads & diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py index c1a817853..861a4c469 100644 --- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py @@ -19,32 +19,45 @@ from pathlib import Path from typing import List, cast +import yaml from pydantic import BaseModel, TypeAdapter, ValidationError from cloudai.core import File, GitRepo from cloudai.systems.slurm import SlurmCommandGenStrategy -from .ai_dynamo import AIDynamoTestDefinition +from .ai_dynamo import LMCACHE_CONFIG_BACKUP_FILE_NAME, LMCACHE_CONFIG_FILE_NAME, AIDynamoTestDefinition class AIDynamoSlurmCommandGenStrategy(SlurmCommandGenStrategy): """Command generation strategy for AI Dynamo on Slurm systems.""" - def _container_mounts(self) -> list[str]: - td = cast(AIDynamoTestDefinition, self.test_run.test) + @property + def td(self) -> AIDynamoTestDefinition: + return cast(AIDynamoTestDefinition, self.test_run.test) + def _container_mounts(self) -> list[str]: result = [f"{self.system.hf_home_path.absolute()}:{self.CONTAINER_MOUNT_HF_HOME}"] - logging.info(f"storage_cache_dir: {td.cmd_args.storage_cache_dir}") - if td.cmd_args.storage_cache_dir: - result.append(f"{td.cmd_args.storage_cache_dir}:{td.cmd_args.storage_cache_dir}") + logging.info(f"storage_cache_dir: {self.td.cmd_args.storage_cache_dir}") + if self.td.cmd_args.storage_cache_dir: + result.append(f"{self.td.cmd_args.storage_cache_dir}:{self.td.cmd_args.storage_cache_dir}") return result + @property + def final_env_vars(self) -> dict[str, str | list[str]]: + env_vars = super().final_env_vars + if self.td.cmd_args.lmcache is not None: + env_vars["LMCACHE_CONFIG_FILE"] = f"{self.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" + return env_vars + + @final_env_vars.setter + def final_env_vars(self, value: dict[str, str | list[str]]) -> None: + self._final_env_vars = value + def image_path(self) -> str | None: - tdef: AIDynamoTestDefinition = cast(AIDynamoTestDefinition, self.test_run.test) - if tdef.docker_image and tdef.docker_image.installed_path: - return str(tdef.docker_image.installed_path) + if self.td.docker_image and self.td.docker_image.installed_path: + return str(self.td.docker_image.installed_path) return None def _get_toml_args(self, base_model: BaseModel, prefix: str, exclude: List[str] | None = None) -> List[str]: @@ -87,8 +100,19 @@ def _get_nested_toml_args(self, base_model: BaseModel, prefix: str) -> List[str] return result + def _prepare_lmcache_config(self): + if self.td.cmd_args.lmcache is None: + return + + self.test_run.output_path.mkdir(parents=True, exist_ok=True) + config = yaml.safe_dump(self.td.cmd_args.lmcache, sort_keys=False) + (self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).write_text(config) + (self.test_run.output_path / LMCACHE_CONFIG_BACKUP_FILE_NAME).write_text(config) + def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: - assert td.repo.installed_path + self._prepare_lmcache_config() + if not td.repo.installed_path: + raise ValueError("Dynamo repo is not installed") args = [ "--user $USER", f"--install-dir {self.CONTAINER_MOUNT_INSTALL}", @@ -102,6 +126,8 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: if td.cmd_args.storage_cache_dir: args.append(f"--storage-cache-dir {td.cmd_args.storage_cache_dir}") + if td.cmd_args.lmcache_controller: + args.append(f"--lmcache-controller-cmd {shlex.quote(td.cmd_args.lmcache_controller.cmd)}") args.extend( self._get_toml_args( @@ -118,7 +144,6 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: args.extend(self._get_nested_toml_args(td.cmd_args.dynamo.prefill_worker, "--prefill-")) args.extend(self._get_nested_toml_args(td.cmd_args.dynamo.decode_worker, "--decode-")) - args.extend(self._get_nested_toml_args(td.cmd_args.lmcache, "--lmcache-")) args.extend(self._get_nested_toml_args(td.cmd_args.genai_perf, "--genai_perf-")) args.extend(self._get_nested_toml_args(td.cmd_args.aiperf, "--aiperf-")) if td.cmd_args.aiperf_accuracy is not None: @@ -127,7 +152,6 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: return args def _gen_srun_command(self) -> str: - td = cast(AIDynamoTestDefinition, self.test_run.test) num_nodes, node_list = self.get_cached_nodes_spec() out_dir = str(self.test_run.output_path.absolute()) @@ -142,10 +166,10 @@ def _gen_srun_command(self) -> str: f"--output={out_dir}/node-%n-stdout.txt", f"--error={out_dir}/node-%n-stderr.txt", "bash", - f"{self.CONTAINER_MOUNT_INSTALL}/{td.script.src.name}", + f"{self.CONTAINER_MOUNT_INSTALL}/{self.td.script.src.name}", ] ) - srun_cmd.extend(self._gen_script_args(td)) + srun_cmd.extend(self._gen_script_args(self.td)) return " \\\n ".join(srun_cmd) + "\n" def _validate_worker_nodes( @@ -184,13 +208,12 @@ def get_cached_nodes_spec(self) -> tuple[int, list[str]]: if cache_key in self._node_spec_cache: return self._node_spec_cache[cache_key] - td = cast(AIDynamoTestDefinition, self.test_run.test) prefill_n, prefill_nodes = 0, "" - if td.cmd_args.dynamo.prefill_worker: - prefill_n = cast(int, td.cmd_args.dynamo.prefill_worker.num_nodes) - prefill_nodes = td.cmd_args.dynamo.prefill_worker.nodes - decode_n = td.cmd_args.dynamo.decode_worker.num_nodes - decode_nodes = td.cmd_args.dynamo.decode_worker.nodes + if self.td.cmd_args.dynamo.prefill_worker: + prefill_n = cast(int, self.td.cmd_args.dynamo.prefill_worker.num_nodes) + prefill_nodes = self.td.cmd_args.dynamo.prefill_worker.nodes + decode_n = self.td.cmd_args.dynamo.decode_worker.num_nodes + decode_nodes = self.td.cmd_args.dynamo.decode_worker.nodes assert isinstance(prefill_n, int), "prefill_worker.num_nodes must be an integer" assert isinstance(decode_n, int), "decode_worker.num_nodes must be an integer" diff --git a/tests/ref_data/ai-dynamo.sbatch b/tests/ref_data/ai-dynamo.sbatch index 492e3c427..865444b81 100644 --- a/tests/ref_data/ai-dynamo.sbatch +++ b/tests/ref_data/ai-dynamo.sbatch @@ -61,20 +61,6 @@ srun \ --decode-args-model "model" \ --decode-args-pipeline-parallel-size "1" \ --decode-args-tensor-parallel-size "1" \ - --lmcache-controller_cmd "lmcache_controller --host localhost --port 9000 --monitor-port 9001" \ - --lmcache-repo "/cloudai_install/LMCache__ab8530993992db873869ba882320953582d94309" \ - --lmcache-args-chunk_size "256" \ - --lmcache-args-local_cpu "False" \ - --lmcache-args-nixl_buffer_size "10737418240" \ - --lmcache-args-nixl_buffer_device "cuda" \ - --lmcache-args-extra_config_enable_nixl_storage "True" \ - --lmcache-args-extra_config_nixl_backend "GDS_MT" \ - --lmcache-args-extra_config_nixl_file_pool_size "64" \ - --lmcache-args-enable_controller "True" \ - --lmcache-args-lmcache_instance_id "lmcache_default_instance" \ - --lmcache-args-controller_url "localhost:9001" \ - --lmcache-args-lmcache_worker_port "8788" \ - --lmcache-args-distributed_url "localhost:8789" \ --genai_perf-name "genai_perf" \ --genai_perf-cmd "genai-perf profile" \ --genai_perf-script "/cloudai_install/genai_perf.sh" \ @@ -89,4 +75,4 @@ srun \ --aiperf-name "aiperf" \ --aiperf-cmd "aiperf profile" \ --aiperf-script "/cloudai_install/aiperf.sh" \ - --aiperf-report-name "aiperf_report.csv" \ No newline at end of file + --aiperf-report-name "aiperf_report.csv" diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py index cf603b767..d45416595 100644 --- a/tests/test_acceptance.py +++ b/tests/test_acceptance.py @@ -35,8 +35,6 @@ AIDynamoCmdArgs, AIDynamoTestDefinition, GenAIPerf, - LMCache, - LMCacheArgs, WorkerBaseArgs, WorkerConfig, ) @@ -528,14 +526,6 @@ def test_req(request, slurm_system: SlurmSystem, partial_tr: partial[TestRun]) - "warmup-request-count": 10, } ), - lmcache=LMCache( - args=LMCacheArgs(), - repo=GitRepo( - url="https://github.com/LMCache/LMCache.git", - commit="ab8530993992db873869ba882320953582d94309", - installed_path=slurm_system.install_path, - ), - ), ), ), ), diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py index ecb9eb0a5..fdda413e4 100644 --- a/tests/test_cloudaigym.py +++ b/tests/test_cloudaigym.py @@ -213,6 +213,22 @@ def test_action_space(nemorun: NeMoRunTestDefinition, setup_env: tuple[TestRun, assert action_space["NUM_NODES"] == tr.num_nodes +def test_action_space_excludes_configured_cmd_arg_prefix( + nemorun: NeMoRunTestDefinition, setup_env: tuple[TestRun, BaseRunner] +): + tr, _ = setup_env + nemorun.cmd_args.trainer = Trainer( + max_steps=[1000, 2000], strategy=TrainerStrategy(tensor_model_parallel_size=[1, 2]) + ) + nemorun.dse_excluded_args = ["cmd_args.trainer.strategy"] + tr.test = nemorun + + action_space = tr.param_space + + assert action_space["trainer.max_steps"] == [1000, 2000] + assert "trainer.strategy.tensor_model_parallel_size" not in action_space + + @pytest.mark.parametrize("num_nodes", (1, [1, 2], [3])) def test_all_combinations(nemorun: NeMoRunTestDefinition, setup_env: tuple[TestRun, BaseRunner], num_nodes: int): tr, _ = setup_env diff --git a/tests/test_test_scenario.py b/tests/test_test_scenario.py index 8b84a01db..8b390e8a1 100644 --- a/tests/test_test_scenario.py +++ b/tests/test_test_scenario.py @@ -21,7 +21,7 @@ import pytest import toml -from cloudai._core.exceptions import MissingTestError +from cloudai._core.exceptions import MissingTestError, TestConfigParsingError from cloudai.core import ( CmdArgs, GitRepo, @@ -615,6 +615,62 @@ def test_agent_config_is_merged_with_scenario_override( "start_action": "random", } + def test_dse_excluded_args_can_be_set_from_scenario_toml( + self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem + ): + test_scenario_parser.test_mapping = { + "nccl": NCCLTestDefinition( + name="nccl", + description="desc", + test_template_name="NcclTest", + cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"), + ) + } + model = TestScenarioModel.model_validate( + toml.loads( + """ + name = "test" + + [[Tests]] + id = "1" + test_name = "nccl" + dse_excluded_args = ["cmd_args.foo", "cmd_args.bar.baz"] + """ + ) + ) + tdef = test_scenario_parser._prepare_tdef(model.tests[0]) + + assert tdef.dse_excluded_args == ["cmd_args.foo", "cmd_args.bar.baz"] + + def test_dse_excluded_args_must_use_cmd_args_prefix( + self, test_scenario_parser: TestScenarioParser, slurm_system: SlurmSystem + ): + test_scenario_parser.test_mapping = { + "nccl": NCCLTestDefinition( + name="nccl", + description="desc", + test_template_name="NcclTest", + cmd_args=NCCLCmdArgs(docker_image_url="fake://url/nccl"), + ) + } + model = TestScenarioModel.model_validate( + toml.loads( + """ + name = "test" + + [[Tests]] + id = "1" + test_name = "nccl" + dse_excluded_args = ["foo"] + """ + ) + ) + + with pytest.raises(TestConfigParsingError) as excinfo: + test_scenario_parser._prepare_tdef(model.tests[0]) + + assert "DSE excluded arg must start with 'cmd_args.'" in str(excinfo.value.__cause__) + class TestReporters: def test_default(self): diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py index 7b036b5a8..544bb064a 100644 --- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py +++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py @@ -19,11 +19,14 @@ from typing import cast import pytest +import yaml from cloudai._core.test_scenario import TestRun from cloudai.core import GitRepo from cloudai.systems.slurm import SlurmSystem from cloudai.workloads.ai_dynamo import ( + LMCACHE_CONFIG_BACKUP_FILE_NAME, + LMCACHE_CONFIG_FILE_NAME, AIDynamoArgs, AIDynamoCmdArgs, AIDynamoSlurmCommandGenStrategy, @@ -31,8 +34,7 @@ AIPerf, AIPerfAccuracy, GenAIPerf, - LMCache, - LMCacheArgs, + LMCacheController, WorkerBaseArgs, WorkerConfig, ) @@ -87,7 +89,6 @@ def cmd_args() -> AIDynamoCmdArgs: "request-count": 10, } ), - lmcache=LMCache(args=LMCacheArgs()), ) @@ -227,3 +228,67 @@ def test_gen_script_args_quotes_worker_json_args(strategy: AIDynamoSlurmCommandG assert f"--prefill-args-kv-transfer-config '{config}'" in result assert f"--decode-args-kv-transfer-config '{config}'" in result + + +def test_gen_script_args_writes_lmcache_object_as_yaml(strategy: AIDynamoSlurmCommandGenStrategy) -> None: + td = cast(AIDynamoTestDefinition, strategy.test_run.test) + td.cmd_args.lmcache = { + "chunk_size": 512, + "local_cpu": True, + "controller_pull_url": "{frontend_node}:8300", + "controller_reply_url": "{frontend_node}:8400", + "lmcache_worker_ports": [8788, 8789, 8790, 8791], + "extra_config": { + "enable_nixl_storage": False, + "nixl_backend": "POSIX", + "nixl_path": "{storage_cache_dir}", + }, + } + + result = strategy._gen_script_args(td) + + config_path = strategy.test_run.output_path / LMCACHE_CONFIG_FILE_NAME + backup_path = strategy.test_run.output_path / LMCACHE_CONFIG_BACKUP_FILE_NAME + config = yaml.safe_load(config_path.read_text()) + backup_config = yaml.safe_load(backup_path.read_text()) + assert ( + strategy.final_env_vars["LMCACHE_CONFIG_FILE"] + == f"{strategy.CONTAINER_MOUNT_OUTPUT}/{LMCACHE_CONFIG_FILE_NAME}" + ) + assert config["chunk_size"] == 512 + assert config["local_cpu"] is True + assert config["controller_pull_url"] == "{frontend_node}:8300" + assert config["controller_reply_url"] == "{frontend_node}:8400" + assert config["lmcache_worker_ports"] == [8788, 8789, 8790, 8791] + assert config["extra_config"]["enable_nixl_storage"] is False + assert config["extra_config"]["nixl_backend"] == "POSIX" + assert config["extra_config"]["nixl_path"] == "{storage_cache_dir}" + assert backup_config == config + assert "--lmcache" not in result + + +def test_lmcache_config_supports_dse_with_excluded_prefix(test_run: TestRun) -> None: + td = cast(AIDynamoTestDefinition, test_run.test) + td.dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"] + td.cmd_args.lmcache = { + "chunk_size": [256, 512], + "lmcache_worker_ports": [8788, 8789, 8790, 8791], + } + + assert test_run.is_dse_job is True + assert test_run.param_space["lmcache.chunk_size"] == [256, 512] + assert "lmcache.lmcache_worker_ports" not in test_run.param_space + + new_test_run = test_run.apply_params_set({"lmcache.chunk_size": 512}) + + assert cast(AIDynamoTestDefinition, new_test_run.test).cmd_args.lmcache["chunk_size"] == 512 # type: ignore + + +def test_gen_script_args_passes_lmcache_controller_cmd(strategy: AIDynamoSlurmCommandGenStrategy) -> None: + td = cast(AIDynamoTestDefinition, strategy.test_run.test) + cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001" + td.cmd_args.lmcache_controller = LMCacheController(cmd=cmd) + + result = strategy._gen_script_args(td) + + assert f"--lmcache-controller-cmd {shlex.quote(cmd)}" in result diff --git a/tests/workloads/ai_dynamo/test_json_gen_strategy_kubernetes.py b/tests/workloads/ai_dynamo/test_json_gen_strategy_kubernetes.py index 569978f49..b3120f5b9 100644 --- a/tests/workloads/ai_dynamo/test_json_gen_strategy_kubernetes.py +++ b/tests/workloads/ai_dynamo/test_json_gen_strategy_kubernetes.py @@ -29,8 +29,6 @@ AIDynamoKubernetesJsonGenStrategy, AIDynamoTestDefinition, GenAIPerf, - LMCache, - LMCacheArgs, WorkerBaseArgs, WorkerConfig, ) @@ -54,7 +52,6 @@ def dynamo(request: Any) -> AIDynamoTestDefinition: ) ), genai_perf=GenAIPerf(), - lmcache=LMCache(args=LMCacheArgs()), ), ) if request.param == "disagg": diff --git a/tests/workloads/ai_dynamo/test_report_gen_strategy.py b/tests/workloads/ai_dynamo/test_report_gen_strategy.py index 47e214421..f81e9a2dd 100644 --- a/tests/workloads/ai_dynamo/test_report_gen_strategy.py +++ b/tests/workloads/ai_dynamo/test_report_gen_strategy.py @@ -28,8 +28,6 @@ AIPerf, AIPerfAccuracy, GenAIPerf, - LMCache, - LMCacheArgs, WorkerBaseArgs, WorkerConfig, ) @@ -89,7 +87,6 @@ def ai_dynamo_tr(tmp_path: Path) -> TestRun: ), ), genai_perf=GenAIPerf(), - lmcache=LMCache(args=LMCacheArgs()), ), ) tr = TestRun(name="ai_dynamo", test=test, num_nodes=1, nodes=[], output_path=tmp_path) @@ -121,7 +118,6 @@ def ai_dynamo_aiperf_tr(tmp_path: Path) -> TestRun: ), ), aiperf=AIPerf(), - lmcache=LMCache(args=LMCacheArgs()), ), ) tr = TestRun(name="ai_dynamo_aiperf", test=test, num_nodes=1, nodes=[], output_path=tmp_path) @@ -148,7 +144,6 @@ def ai_dynamo_aiperf_with_split_accuracy_tr(tmp_path: Path) -> TestRun: ), aiperf=AIPerf(), aiperf_accuracy=AIPerfAccuracy.model_validate({"cli": get_aiperf_accuracy_cli()}), - lmcache=LMCache(args=LMCacheArgs()), ), ) tr = TestRun(name="ai_dynamo_aiperf_with_split_accuracy", test=test, num_nodes=1, nodes=[], output_path=tmp_path) @@ -176,7 +171,6 @@ def ai_dynamo_genai_perf_with_split_accuracy_tr(tmp_path: Path) -> TestRun: ), genai_perf=GenAIPerf(), aiperf_accuracy=AIPerfAccuracy.model_validate({"cli": get_aiperf_accuracy_cli()}), - lmcache=LMCache(args=LMCacheArgs()), ), ) tr = TestRun(