From 3cfc9b5ff5d0321b934c44614847e0ab65934651 Mon Sep 17 00:00:00 2001
From: Curtis Anderson <curtis@mlcommons.org>
Date: Tue, 28 Apr 2026 11:01:42 -0700
Subject: [PATCH 1/4] Use CLOSED/OPEN to dynamically set the args & usage
 message

---
 mlpstorage_py/cli/__init__.py              |  16 +-
 mlpstorage_py/cli/checkpointing_args.py    | 129 +++++++----
 mlpstorage_py/cli/common_args.py           | 151 ++++++-------
 mlpstorage_py/cli/kvcache_args.py          | 239 +++++++++++++--------
 mlpstorage_py/cli/lockfile_args.py         |   6 +-
 mlpstorage_py/cli/training_args.py         |  66 ++++--
 mlpstorage_py/cli/utility_args.py          |   8 +-
 mlpstorage_py/cli/vectordb_args.py         |  38 +++-
 mlpstorage_py/cli_parser.py                |  90 +++++---
 mlpstorage_py/config.py                    |   7 +-
 tests/README.md                            |   4 +-
 tests/configs/S3_TESTING_GUIDE.md          |  10 +-
 tests/fixtures/mock_collector.py           |   2 +-
 tests/fixtures/sample_data.py              |   6 +-
 tests/object-store/test_mlp_minio.sh       |   2 +-
 tests/object-store/test_mlp_s3dlio.sh      |   4 +-
 tests/object-store/test_mlp_s3torch.sh     |   2 +-
 tests/object-store/test_s3dlio_multilib.sh |   4 +-
 18 files changed, 482 insertions(+), 302 deletions(-)

diff --git a/mlpstorage_py/cli/__init__.py b/mlpstorage_py/cli/__init__.py
index 3575f1b0..92b68fd1 100755
--- a/mlpstorage_py/cli/__init__.py
+++ b/mlpstorage_py/cli/__init__.py
@@ -31,10 +31,10 @@
     add_dlio_arguments,
 )
 
-from mlpstorage_py.cli.training_args import add_training_arguments
-from mlpstorage_py.cli.checkpointing_args import add_checkpointing_arguments
-from mlpstorage_py.cli.vectordb_args import add_vectordb_arguments
-from mlpstorage_py.cli.kvcache_args import add_kvcache_arguments
+from mlpstorage_py.cli.training_args import add_training_arguments, validate_training_arguments
+from mlpstorage_py.cli.checkpointing_args import add_checkpointing_arguments, validate_checkpointing_arguments
+from mlpstorage_py.cli.vectordb_args import add_vectordb_arguments, validate_vectordb_arguments
+from mlpstorage_py.cli.kvcache_args import add_kvcache_arguments, validate_kvcache_arguments
 from mlpstorage_py.cli.utility_args import add_reports_arguments, add_history_arguments
 from mlpstorage_py.cli.lockfile_args import add_lockfile_arguments
 
@@ -47,10 +47,10 @@
     'add_host_arguments',
     'add_dlio_arguments',
     # Benchmark argument builders
-    'add_training_arguments',
-    'add_checkpointing_arguments',
-    'add_vectordb_arguments',
-    'add_kvcache_arguments',
+    'add_training_arguments',      'validate_training_arguments',
+    'add_checkpointing_arguments', 'validate_checkpointing_arguments',
+    'add_vectordb_arguments',      'validate_vectordb_arguments',
+    'add_kvcache_arguments',       'validate_kvcache_arguments',
     # Utility argument builders
     'add_reports_arguments',
     'add_history_arguments',
diff --git a/mlpstorage_py/cli/checkpointing_args.py b/mlpstorage_py/cli/checkpointing_args.py
index 8dbd56f5..600baff1 100755
--- a/mlpstorage_py/cli/checkpointing_args.py
+++ b/mlpstorage_py/cli/checkpointing_args.py
@@ -5,7 +5,9 @@
 including datasize and run commands.
 """
 
-from mlpstorage_py.config import DEFAULT_HOSTS, EXEC_TYPE
+import sys
+
+from mlpstorage_py.config import DEFAULT_HOSTS, EXEC_TYPE, LLM_MODELS, LLM_MODELS_CLOSED, EXIT_CODE
 from mlpstorage_py.cli.common_args import (
     HELP_MESSAGES,
     add_universal_arguments,
@@ -16,7 +18,7 @@
 )
 
 
-def add_checkpointing_arguments(parser):
+def add_checkpointing_arguments(parser, is_closed):
     """Add checkpointing benchmark arguments to the parser.
 
     Args:
@@ -37,8 +39,7 @@ def add_checkpointing_arguments(parser):
 
     # Common arguments for both datasize and run
     for _parser in [datasize, run_benchmark]:
-        add_host_arguments(_parser)
-
+        add_host_arguments(_parser, is_closed)
         _parser.add_argument(
             '--client-host-memory-in-gb', '-cm',
             type=int,
@@ -48,53 +49,89 @@ def add_checkpointing_arguments(parser):
 
         # Model argument - using help text with choices instead of choices param
         # to avoid very long help output
-        _parser.add_argument(
-            '--model', '-m',
-            required=True,
-            help=HELP_MESSAGES['llm_model']
-        )
+        if is_closed:
+            _parser.add_argument(
+                '--model', '-m',
+                choices=LLM_MODELS_CLOSED,
+                required=True,
+                help=HELP_MESSAGES['llm_model']
+            )
+
+        else:
+            _parser.add_argument(
+                '--model', '-m',
+                choices=LLM_MODELS,
+                required=True,
+                help=HELP_MESSAGES['llm_model']
+            )
+
+        if is_closed:
+            parser.set_defaults(
+                num_checkpoints_read=10,
+                num_checkpoints_write=10
+            )
+        else:
+            _parser.add_argument(
+                '--num-checkpoints-read', '-ncr',
+                type=int,
+                default=10,
+                help=HELP_MESSAGES['num_checkpoints']
+            )
+
+            _parser.add_argument(
+                '--num-checkpoints-write', '-ncw',
+                type=int,
+                default=10,
+                help=HELP_MESSAGES['num_checkpoints']
+            )
+
+            add_dlio_arguments(_parser, is_closed)
+
+            # Add exec-type and MPI arguments to both datasize and run
+            _parser.add_argument(
+                '--exec-type', '-et',
+                type=EXEC_TYPE,
+                choices=list(EXEC_TYPE),
+                default=EXEC_TYPE.MPI,
+                help=HELP_MESSAGES['exec_type']
+            )
+            add_mpi_arguments(_parser, is_closed)
+
+    run_benchmark.add_argument(
+        '--num-processes', '-np',
+        type=int,
+        required=True,
+        help=HELP_MESSAGES['num_checkpoint_accelerators']
+    )
 
-        _parser.add_argument(
-            '--num-checkpoints-read', '-ncr',
-            type=int,
-            default=10,
-            help=HELP_MESSAGES['num_checkpoints']
-        )
+    run_benchmark.add_argument(
+        "--checkpoint-folder", '-cf',
+        type=str,
+        required=True,
+        help=HELP_MESSAGES['checkpoint_folder']
+    )
 
-        _parser.add_argument(
-            '--num-checkpoints-write', '-ncw',
-            type=int,
-            default=10,
-            help=HELP_MESSAGES['num_checkpoints']
-        )
+    add_universal_arguments(_parser, True, is_closed)
 
-        _parser.add_argument(
-            '--num-processes', '-np',
-            type=int,
-            required=True,
-            help=HELP_MESSAGES['num_checkpoint_accelerators']
-        )
+    # Add time-series arguments to run command only
+    add_timeseries_arguments(run_benchmark, is_closed)
 
-        _parser.add_argument(
-            "--checkpoint-folder", '-cf',
-            type=str,
-            required=True,
-            help=HELP_MESSAGES['checkpoint_folder']
-        )
 
-        add_dlio_arguments(_parser)
+def validate_checkpointing_arguments(args):
+    """Validate the whole set of args given that we're doing a checkpointing benchmark
+    
+    Args:
+        args (argparse.Namespace): The parsed command-line arguments
+    """
+    error_messages = []
 
-        # Add exec-type and MPI arguments to both datasize and run
-        _parser.add_argument(
-            '--exec-type', '-et',
-            type=EXEC_TYPE,
-            choices=list(EXEC_TYPE),
-            default=EXEC_TYPE.MPI,
-            help=HELP_MESSAGES['exec_type']
-        )
-        add_mpi_arguments(_parser)
+    if args.model not in LLM_MODELS:
+        error_messages.append("Invalid LLM model. Supported models are: {}".format(", ".join(LLM_MODELS)))
+    if args.num_checkpoints_read < 0 or args.num_checkpoints_write < 0:
+        error_messages.append("Number of checkpoints read and write must be non-negative")
 
-        add_universal_arguments(_parser)
+    if error_messages:
+        for msg in error_messages:
+            print(msg)
 
-    # Add time-series arguments to run command only
-    add_timeseries_arguments(run_benchmark)
+        sys.exit(EXIT_CODE.INVALID_ARGUMENTS)
diff --git a/mlpstorage_py/cli/common_args.py b/mlpstorage_py/cli/common_args.py
index 1eaad497..f44460c4 100755
--- a/mlpstorage_py/cli/common_args.py
+++ b/mlpstorage_py/cli/common_args.py
@@ -9,7 +9,7 @@
 """
 
 from mlpstorage_py.config import (
-    CHECKPOINT_RANKS_STRINGS, MODELS, ACCELERATORS, DEFAULT_HOSTS,
+    CHECKPOINT_RANKS_STRINGS, MODELS, ACCELERATORS, ACCELERATORS_CLOSED, DEFAULT_HOSTS,
     LLM_MODELS_STRINGS, MPI_CMDS, EXEC_TYPE, DEFAULT_RESULTS_DIR,
     VECTOR_DTYPES, DISTRIBUTIONS
 )
@@ -167,7 +167,7 @@
 }
 
 
-def add_universal_arguments(parser):
+def add_universal_arguments(parser, offer_object, is_closed):
     """Add arguments common to all benchmarks and commands.
 
     Args:
@@ -180,12 +180,19 @@ def add_universal_arguments(parser):
         default=DEFAULT_RESULTS_DIR,
         help=HELP_MESSAGES['results_dir']
     )
-    standard_args.add_argument(
-        '--loops',
-        type=int,
-        default=1,
-        help="Number of times to run the benchmark"
-    )
+
+    if is_closed:
+        parser.set_defaults(
+            loops=1
+        )
+    else:
+        standard_args.add_argument(
+            '--loops',
+            type=int,
+            default=1,
+            help="Number of times to run the benchmark"
+        )
+
     standard_args.add_argument(
         '--config-file', '-c',
         type=str,
@@ -193,35 +200,26 @@ def add_universal_arguments(parser):
     )
 
     # Create a mutually exclusive group for file/object options
-    access_proto = standard_args.add_mutually_exclusive_group(required=True)
-    access_proto.add_argument(
-        "--file",
-        action="store_true",
-        help="Use POSIX files as the data access method"
-    )
-    access_proto.add_argument(
-        "--object",
-        nargs="?",
-        type=str,
-        const="s3",
-        choices=["s3"],
-        help="Use the given Object API as the data access method, defaults to S3"
-    )
-
-    # Create a mutually exclusive group for closed/open options
-    submission_group = standard_args.add_mutually_exclusive_group()
-    submission_group.add_argument(
-        "--open",
-        action="store_false",
-        dest="closed",
-        default=False,
-        help="Run as an open submission"
-    )
-    submission_group.add_argument(
-        "--closed",
-        action="store_true",
-        help="Run as a closed submission"
-    )
+    if offer_object:
+        access_proto = standard_args.add_mutually_exclusive_group(required=True)
+        access_proto.add_argument(
+            "--file",
+            action="store_true",
+            help="Use POSIX files as the data access method"
+        )
+        access_proto.add_argument(
+            "--object",
+            nargs="?",
+            type=str,
+            const="s3",
+            choices=["s3"],
+            help="Use the given Object API as the data access method, defaults to S3"
+        )
+    else:
+        parser.set_defaults(
+            file=True,
+            object=False
+        )
 
     output_control = parser.add_argument_group("Output Control")
     output_control.add_argument(
@@ -239,11 +237,13 @@ def add_universal_arguments(parser):
         type=str,
         default="INFO"
     )
-    output_control.add_argument(
-        "--allow-invalid-params", "-aip",
-        action="store_true",
-        help="Do not fail on invalid parameters."
-    )
+
+    if not is_closed:
+        output_control.add_argument(
+            "--allow-invalid-params", "-aip",
+            action="store_true",
+            help="Do not fail on invalid parameters."
+        )
 
     view_only_args = parser.add_argument_group("View Only")
     view_only_args.add_argument(
@@ -266,7 +266,7 @@ def add_universal_arguments(parser):
     )
 
 
-def add_mpi_arguments(parser):
+def add_mpi_arguments(parser, is_closed):
     """Add MPI-related arguments.
 
     Args:
@@ -296,7 +296,7 @@ def add_mpi_arguments(parser):
     )
 
 
-def add_host_arguments(parser, required=False):
+def add_host_arguments(parser, is_closed, required=False):
     """Add host-related arguments common to distributed benchmarks.
 
     Args:
@@ -312,7 +312,7 @@ def add_host_arguments(parser, required=False):
     )
 
 
-def add_dlio_arguments(parser):
+def add_dlio_arguments(parser, is_closed):
     """Add DLIO-related arguments.
 
     Args:
@@ -323,16 +323,22 @@ def add_dlio_arguments(parser):
         type=str,
         help="Path to DLIO binary. Default is the same as mlpstorage binary path"
     )
-    parser.add_argument(
-        '--params', '-p',
-        nargs="+",
-        type=str,
-        action="append",
-        help=HELP_MESSAGES['params']
-    )
-
 
-def add_timeseries_arguments(parser):
+    if is_closed:
+        parser.set_defaults(
+            params=''
+        )
+    else:
+        parser.add_argument(
+            '--params', '-p',
+            nargs="+",
+            type=str,
+            action="append",
+            help=HELP_MESSAGES['params']
+        )
+
+
+def add_timeseries_arguments(parser, is_closed):
     """Add time-series collection arguments.
 
     These arguments control the collection of time-series host metrics
@@ -341,21 +347,22 @@ def add_timeseries_arguments(parser):
     Args:
         parser: Argparse parser to add arguments to.
     """
-    timeseries_group = parser.add_argument_group("Time-Series Collection")
-    timeseries_group.add_argument(
-        '--timeseries-interval',
-        type=float,
-        default=10.0,
-        help=HELP_MESSAGES['timeseries_interval']
-    )
-    timeseries_group.add_argument(
-        '--skip-timeseries',
-        action='store_true',
-        help=HELP_MESSAGES['skip_timeseries']
-    )
-    timeseries_group.add_argument(
-        '--max-timeseries-samples',
-        type=int,
-        default=3600,
-        help=HELP_MESSAGES['max_timeseries_samples']
-    )
+    if not is_closed:
+        timeseries_group = parser.add_argument_group("Time-Series Collection")
+        timeseries_group.add_argument(
+            '--timeseries-interval',
+            type=float,
+            default=10.0,
+            help=HELP_MESSAGES['timeseries_interval']
+        )
+        timeseries_group.add_argument(
+            '--skip-timeseries',
+            action='store_true',
+            help=HELP_MESSAGES['skip_timeseries']
+        )
+        timeseries_group.add_argument(
+            '--max-timeseries-samples',
+            type=int,
+            default=3600,
+            help=HELP_MESSAGES['max_timeseries_samples']
+        )
diff --git a/mlpstorage_py/cli/kvcache_args.py b/mlpstorage_py/cli/kvcache_args.py
index c2494f04..7a7f2d38 100755
--- a/mlpstorage_py/cli/kvcache_args.py
+++ b/mlpstorage_py/cli/kvcache_args.py
@@ -5,12 +5,16 @@
 including run and datasize commands for LLM inference storage testing.
 """
 
+import sys
+
 from mlpstorage_py.config import (
     KVCACHE_MODELS,
+    KVCACHE_MODEL_DEFAULT,
     KVCACHE_PERFORMANCE_PROFILES,
     KVCACHE_GENERATION_MODES,
     KVCACHE_DEFAULT_DURATION,
     EXEC_TYPE,
+    EXIT_CODE,
 )
 from mlpstorage_py.cli.common_args import (
     HELP_MESSAGES,
@@ -63,7 +67,7 @@
 }
 
 
-def add_kvcache_arguments(parser):
+def add_kvcache_arguments(parser, is_closed):
     """Add KV Cache benchmark arguments to the parser.
 
     Args:
@@ -84,66 +88,80 @@ def add_kvcache_arguments(parser):
 
     # Add arguments to both run and datasize commands
     for _parser in [run_benchmark, datasize]:
-        _add_kvcache_model_arguments(_parser)
-        _add_kvcache_cache_arguments(_parser)
-        add_universal_arguments(_parser)
+        _add_kvcache_model_arguments(_parser, is_closed)
+        _add_kvcache_cache_arguments(_parser, is_closed)
+        add_universal_arguments(_parser, False, is_closed)
 
     # Run-specific arguments
-    _add_kvcache_run_arguments(run_benchmark)
-    _add_kvcache_optional_features(run_benchmark)
+    _add_kvcache_run_arguments(run_benchmark, is_closed)
+    _add_kvcache_optional_features(run_benchmark, is_closed)
 
     # Add distributed execution arguments to run command only
-    _add_kvcache_distributed_arguments(run_benchmark)
+    _add_kvcache_distributed_arguments(run_benchmark, is_closed)
+
 
 
-def _add_kvcache_model_arguments(parser):
+def _add_kvcache_model_arguments(parser, is_closed):
     """Add model configuration arguments.
 
     Args:
         parser: Argparse parser to add arguments to.
     """
-    model_group = parser.add_argument_group("Model Configuration")
-    model_group.add_argument(
-        '--model', '-m',
-        choices=KVCACHE_MODELS,
-        default='llama3.1-8b',
-        help=KVCACHE_HELP_MESSAGES['kvcache_model']
-    )
-    model_group.add_argument(
-        '--num-users', '-nu',
-        type=int,
-        default=100,
-        help=KVCACHE_HELP_MESSAGES['num_users']
-    )
+    if is_closed:
+        # Hardcode the default values into the namespace without presenting CLI options
+        parser.set_defaults(
+            model=KVCACHE_MODEL_DEFAULT,
+            num_users=100
+        )
+    else:
+        model_group = parser.add_argument_group("Model Configuration")
+        model_group.add_argument(
+            '--model', '-m',
+            choices=KVCACHE_MODELS,
+            default='llama3.1-8b',
+            help=KVCACHE_HELP_MESSAGES['kvcache_model']
+        )
+        model_group.add_argument(
+            '--num-users', '-nu',
+            type=int,
+            default=100,
+            help=KVCACHE_HELP_MESSAGES['num_users']
+        )
 
 
-def _add_kvcache_cache_arguments(parser):
+def _add_kvcache_cache_arguments(parser, is_closed):
     """Add cache tier configuration arguments.
 
     Args:
         parser: Argparse parser to add arguments to.
     """
     cache_group = parser.add_argument_group("Cache Configuration")
-    cache_group.add_argument(
-        '--gpu-mem-gb',
-        type=float,
-        default=16.0,
-        help=KVCACHE_HELP_MESSAGES['gpu_mem_gb']
-    )
-    cache_group.add_argument(
-        '--cpu-mem-gb',
-        type=float,
-        default=32.0,
-        help=KVCACHE_HELP_MESSAGES['cpu_mem_gb']
-    )
     cache_group.add_argument(
         '--cache-dir',
         type=str,
         help=KVCACHE_HELP_MESSAGES['cache_dir']
     )
+    if is_closed:
+        cache_group.set_defaults(
+            gpu_mem_gb=16.0,
+            cpu_mem_gb=32.0
+        )
+    else:
+        cache_group.add_argument(
+            '--gpu-mem-gb',
+            type=float,
+            default=16.0,
+            help=KVCACHE_HELP_MESSAGES['gpu_mem_gb']
+        )
+        cache_group.add_argument(
+            '--cpu-mem-gb',
+            type=float,
+            default=32.0,
+            help=KVCACHE_HELP_MESSAGES['cpu_mem_gb']
+        )
 
 
-def _add_kvcache_run_arguments(parser):
+def _add_kvcache_run_arguments(parser, is_closed):
     """Add run-specific arguments.
 
     Args:
@@ -151,77 +169,94 @@ def _add_kvcache_run_arguments(parser):
     """
     run_group = parser.add_argument_group("Run Configuration")
     run_group.add_argument(
-        '--duration', '-d',
-        type=int,
-        default=KVCACHE_DEFAULT_DURATION,
-        help=KVCACHE_HELP_MESSAGES['duration']
-    )
-    run_group.add_argument(
-        '--generation-mode',
-        choices=KVCACHE_GENERATION_MODES,
-        default='realistic',
-        help=KVCACHE_HELP_MESSAGES['generation_mode']
-    )
-    run_group.add_argument(
-        '--performance-profile',
-        choices=KVCACHE_PERFORMANCE_PROFILES,
-        default='latency',
-        help=KVCACHE_HELP_MESSAGES['performance_profile']
+        '--kvcache-bin-path',
+        type=str,
+        help=KVCACHE_HELP_MESSAGES['kvcache_bin_path']
     )
     run_group.add_argument(
         '--seed',
         type=int,
         help=KVCACHE_HELP_MESSAGES['seed']
     )
-    run_group.add_argument(
-        '--kvcache-bin-path',
-        type=str,
-        help=KVCACHE_HELP_MESSAGES['kvcache_bin_path']
-    )
 
+    if is_closed:
+        run_group.set_defaults(
+            duration=KVCACHE_DEFAULT_DURATION,
+            generation_mode='realistic',
+            performance_profile='latency'
+        )
+    else:
+        run_group.add_argument(
+            '--duration', '-d',
+            type=int,
+            default=KVCACHE_DEFAULT_DURATION,
+            help=KVCACHE_HELP_MESSAGES['duration']
+        )
+        run_group.add_argument(
+            '--generation-mode',
+            choices=KVCACHE_GENERATION_MODES,
+            default='realistic',
+            help=KVCACHE_HELP_MESSAGES['generation_mode']
+        )
+        run_group.add_argument(
+            '--performance-profile',
+            choices=KVCACHE_PERFORMANCE_PROFILES,
+            default='latency',
+            help=KVCACHE_HELP_MESSAGES['performance_profile']
+        )
 
-def _add_kvcache_optional_features(parser):
+def _add_kvcache_optional_features(parser, is_closed):
     """Add optional feature flags.
 
     Args:
         parser: Argparse parser to add arguments to.
     """
     features_group = parser.add_argument_group("Optional Features")
-    features_group.add_argument(
-        '--disable-multi-turn',
-        action='store_true',
-        help=KVCACHE_HELP_MESSAGES['disable_multi_turn']
-    )
-    features_group.add_argument(
-        '--disable-prefix-caching',
-        action='store_true',
-        help=KVCACHE_HELP_MESSAGES['disable_prefix_caching']
-    )
-    features_group.add_argument(
-        '--enable-rag',
-        action='store_true',
-        help=KVCACHE_HELP_MESSAGES['enable_rag']
-    )
-    features_group.add_argument(
-        '--rag-num-docs',
-        type=int,
-        default=10,
-        help=KVCACHE_HELP_MESSAGES['rag_num_docs']
-    )
-    features_group.add_argument(
-        '--enable-autoscaling',
-        action='store_true',
-        help=KVCACHE_HELP_MESSAGES['enable_autoscaling']
-    )
-    features_group.add_argument(
-        '--autoscaler-mode',
-        choices=['qos', 'predictive'],
-        default='qos',
-        help=KVCACHE_HELP_MESSAGES['autoscaler_mode']
-    )
+    if is_closed:
+        features_group.set_defaults(
+                disable_multi_turn=False,
+                disable_prefix_caching=False,
+                enable_rag=True,
+                rag_num_docs=10,
+                enable_autoscaling=True,
+                autoscaler_mode='qos'
+        )
+    else:
+            features_group.add_argument(
+                '--disable-multi-turn',
+                action='store_true',
+                help=KVCACHE_HELP_MESSAGES['disable_multi_turn']
+            )
+            features_group.add_argument(
+                '--disable-prefix-caching',
+                action='store_true',
+                help=KVCACHE_HELP_MESSAGES['disable_prefix_caching']
+            )
+            features_group.add_argument(
+                '--enable-rag',
+                action='store_true',
+                help=KVCACHE_HELP_MESSAGES['enable_rag']
+            )
+            features_group.add_argument(
+                '--rag-num-docs',
+                type=int,
+                default=10,
+                help=KVCACHE_HELP_MESSAGES['rag_num_docs']
+            )
+            features_group.add_argument(
+                '--enable-autoscaling',
+                action='store_true',
+                help=KVCACHE_HELP_MESSAGES['enable_autoscaling']
+            )
+            features_group.add_argument(
+                '--autoscaler-mode',
+                choices=['qos', 'predictive'],
+                default='qos',
+                help=KVCACHE_HELP_MESSAGES['autoscaler_mode']
+            )
 
 
-def _add_kvcache_distributed_arguments(parser):
+def _add_kvcache_distributed_arguments(parser, is_closed):
     """Add distributed execution arguments for multi-host benchmarking.
 
     Args:
@@ -242,10 +277,28 @@ def _add_kvcache_distributed_arguments(parser):
     )
 
     # Add host arguments from common_args
-    add_host_arguments(parser)
+    add_host_arguments(parser, is_closed)
 
     # Add MPI arguments from common_args
-    add_mpi_arguments(parser)
+    add_mpi_arguments(parser, is_closed)
 
     # Add time-series arguments
-    add_timeseries_arguments(parser)
+    add_timeseries_arguments(parser, is_closed)
+
+
+def validate_kvcache_arguments(args):
+    """Validate the whole set of args given that we're doing a kvcache benchmark
+    
+    Args:
+        args (argparse.Namespace): The parsed command-line arguments
+    """
+    error_messages = []
+
+    if args.data_access_protocol != 'file':
+        error_messages.append("KVCache only supports POSIX file storage, ie: --object= is not supported")
+
+    if error_messages:
+        for msg in error_messages:
+            print(msg)
+
+        sys.exit(EXIT_CODE.INVALID_ARGUMENTS)
diff --git a/mlpstorage_py/cli/lockfile_args.py b/mlpstorage_py/cli/lockfile_args.py
index 905a5a61..5755a6b2 100755
--- a/mlpstorage_py/cli/lockfile_args.py
+++ b/mlpstorage_py/cli/lockfile_args.py
@@ -9,7 +9,7 @@
 from mlpstorage_py.cli.common_args import add_universal_arguments
 
 
-def add_lockfile_arguments(parser):
+def add_lockfile_arguments(parser, is_closed):
     """Add lockfile subcommands to the parser.
 
     Args:
@@ -54,7 +54,7 @@ def add_lockfile_arguments(parser):
         dest="generate_all",
         help="Generate both requirements.txt and requirements-full.txt",
     )
-    add_universal_arguments(generate_parser)
+    add_universal_arguments(generate_parser, True, is_closed)
 
     # Verify subcommand
     verify_parser = subparsers.add_parser(
@@ -83,6 +83,6 @@ def add_lockfile_arguments(parser):
         action="store_true",
         help="Fail on any difference (default: fail only on version mismatch)",
     )
-    add_universal_arguments(verify_parser)
+    add_universal_arguments(verify_parser, True, is_closed)
 
     return parser
diff --git a/mlpstorage_py/cli/training_args.py b/mlpstorage_py/cli/training_args.py
index 96d11cd7..b289b19d 100755
--- a/mlpstorage_py/cli/training_args.py
+++ b/mlpstorage_py/cli/training_args.py
@@ -5,7 +5,11 @@
 including datasize, datagen, run, and configview commands.
 """
 
-from mlpstorage_py.config import MODELS, ACCELERATORS, DEFAULT_HOSTS, EXEC_TYPE
+from mlpstorage_py.config import (
+    MODELS, MODELS_CLOSED, ACCELERATORS, ACCELERATORS_CLOSED,
+    DEFAULT_HOSTS, EXEC_TYPE, EXIT_CODE
+)
+
 from mlpstorage_py.cli.common_args import (
     HELP_MESSAGES,
     add_universal_arguments,
@@ -16,7 +20,7 @@
 )
 
 
-def add_training_arguments(parser):
+def add_training_arguments(parser, is_closed):
     """Add training benchmark arguments to the parser.
 
     Args:
@@ -45,13 +49,21 @@ def add_training_arguments(parser):
 
     # Common arguments for datasize, datagen, and run
     for _parser in [datasize, datagen, run_benchmark]:
-        add_host_arguments(_parser)
-        _parser.add_argument(
-            '--model', '-m',
-            choices=MODELS,
-            required=True,
-            help=HELP_MESSAGES['model']
-        )
+        add_host_arguments(_parser, is_closed)
+        if is_closed:
+            _parser.add_argument(
+                '--model', '-m',
+                choices=MODELS_CLOSED,
+                required=True,
+                help=HELP_MESSAGES['model']
+            )
+        else:
+            _parser.add_argument(
+                '--model', '-m',
+                choices=MODELS,
+                required=True,
+                help=HELP_MESSAGES['model']
+            )
 
         # Memory argument (not for datagen)
         if _parser != datagen:
@@ -70,7 +82,7 @@ def add_training_arguments(parser):
             help=HELP_MESSAGES['exec_type']
         )
 
-        add_mpi_arguments(_parser)
+        add_mpi_arguments(_parser, is_closed)
 
     # Command-specific process count arguments
     datagen.add_argument(
@@ -100,12 +112,20 @@ def add_training_arguments(parser):
 
     # Accelerator type and num client hosts for datasize and run
     for _parser in [datasize, run_benchmark]:
-        _parser.add_argument(
-            '--accelerator-type', '-g',
-            choices=ACCELERATORS,
-            required=True,
-            help=HELP_MESSAGES['accelerator_type']
-        )
+        if is_closed:
+            _parser.add_argument(
+                '--accelerator-type', '-g',
+                choices=ACCELERATORS_CLOSED,
+                required=True,
+                help=HELP_MESSAGES['accelerator_type']
+            )
+        else:
+            _parser.add_argument(
+                '--accelerator-type', '-g',
+                choices=ACCELERATORS,
+                required=True,
+                help=HELP_MESSAGES['accelerator_type']
+            )
         _parser.add_argument(
             '--num-client-hosts', '-nc',
             type=int,
@@ -119,8 +139,16 @@ def add_training_arguments(parser):
             type=str,
             help="Filesystem location for data"
         )
-        add_dlio_arguments(_parser)
-        add_universal_arguments(_parser)
+        add_dlio_arguments(_parser, is_closed)
+        add_universal_arguments(_parser, True, is_closed)
 
     # Add time-series arguments to run command only
-    add_timeseries_arguments(run_benchmark)
+    add_timeseries_arguments(run_benchmark, is_closed)
+
+
+def validate_training_arguments(args):
+    """Validate the whole set of args given that we're doing a training benchmark
+    
+    Args:
+        args (argparse.Namespace): The parsed command-line arguments
+    """
diff --git a/mlpstorage_py/cli/utility_args.py b/mlpstorage_py/cli/utility_args.py
index 5caa8fac..b55c87a2 100755
--- a/mlpstorage_py/cli/utility_args.py
+++ b/mlpstorage_py/cli/utility_args.py
@@ -11,7 +11,7 @@
 )
 
 
-def add_reports_arguments(parser):
+def add_reports_arguments(parser, is_closed):
     """Add reports command arguments to the parser.
 
     Args:
@@ -35,10 +35,10 @@ def add_reports_arguments(parser):
         help=HELP_MESSAGES['output_dir']
     )
 
-    add_universal_arguments(reportgen)
+    add_universal_arguments(reportgen, True, is_closed)
 
 
-def add_history_arguments(parser):
+def add_history_arguments(parser, is_closed):
     """Add history command arguments to the parser.
 
     Args:
@@ -77,4 +77,4 @@ def add_history_arguments(parser):
     )
 
     for _parser in [history, rerun]:
-        add_universal_arguments(_parser)
+        add_universal_arguments(_parser, True, is_closed)
diff --git a/mlpstorage_py/cli/vectordb_args.py b/mlpstorage_py/cli/vectordb_args.py
index 1553780d..64ee5a46 100755
--- a/mlpstorage_py/cli/vectordb_args.py
+++ b/mlpstorage_py/cli/vectordb_args.py
@@ -4,10 +4,10 @@
 This module defines the CLI arguments for the VectorDB benchmark,
 including datasize, datagen, and run commands.
 """
- 
+
 from mlpstorage_py.config import (
     VECTOR_DTYPES, DISTRIBUTIONS, VECTORDB_DEFAULT_RUNTIME,
-    VDB_INDEX_TYPES, VDB_BENCHMARK_MODES,
+    VDB_INDEX_TYPES, VDB_INDEX_TYPES_CLOSED, VDB_BENCHMARK_MODES, EXIT_CODE
 )
 from mlpstorage_py.cli.common_args import (
     HELP_MESSAGES,
@@ -16,7 +16,7 @@
 )
  
  
-def add_vectordb_arguments(parser):
+def add_vectordb_arguments(parser, is_closed):
     """Add VectorDB benchmark arguments to the parser.
  
     Args:
@@ -79,12 +79,20 @@ def add_vectordb_arguments(parser):
         default=1_000_000,
         help=HELP_MESSAGES['num_vectors']
     )
-    datasize.add_argument(
-        '--index-type',
-        choices=VDB_INDEX_TYPES,
-        default="DISKANN",
-        help="Index type for storage estimation"
-    )
+    if is_closed:
+        datasize.add_argument(
+            '--index-type',
+            choices=VDB_INDEX_TYPES_CLOSED,
+            default="DISKANN",
+            help="Index type for storage estimation"
+        )
+    else:
+        datasize.add_argument(
+            '--index-type',
+            choices=VDB_INDEX_TYPES,
+            default="DISKANN",
+            help="Index type for storage estimation"
+        )
     datasize.add_argument(
         '--num-shards',
         type=int,
@@ -192,7 +200,15 @@ def add_vectordb_arguments(parser):
  
     # Add universal arguments to all subcommands
     for _parser in [datasize, datagen, run_benchmark]:
-        add_universal_arguments(_parser)
+        add_universal_arguments(_parser, True, is_closed)
  
     # Add time-series arguments to run command only
-    add_timeseries_arguments(run_benchmark)
+    add_timeseries_arguments(run_benchmark, is_closed)
+
+ 
+def validate_vectordb_arguments(args):
+    """Validate the whole set of args given that we're doing a vectordb benchmark
+    
+    Args:
+        args (argparse.Namespace): The parsed command-line arguments
+    """
diff --git a/mlpstorage_py/cli_parser.py b/mlpstorage_py/cli_parser.py
index af32669f..fae56443 100755
--- a/mlpstorage_py/cli_parser.py
+++ b/mlpstorage_py/cli_parser.py
@@ -17,10 +17,10 @@
     HELP_MESSAGES,
     PROGRAM_DESCRIPTIONS,
     add_universal_arguments,
-    add_training_arguments,
-    add_checkpointing_arguments,
-    add_vectordb_arguments,
-    add_kvcache_arguments,
+    add_training_arguments,      validate_training_arguments,
+    add_checkpointing_arguments, validate_checkpointing_arguments,
+    add_vectordb_arguments,      validate_vectordb_arguments,
+    add_kvcache_arguments,       validate_kvcache_arguments,
     add_reports_arguments,
     add_history_arguments,
     add_lockfile_arguments,
@@ -30,14 +30,42 @@
 help_messages = HELP_MESSAGES
 prog_descriptions = PROGRAM_DESCRIPTIONS
 
+
 def parse_arguments():
     """Parse command-line arguments for MLPerf Storage benchmarks.
 
     Returns:
         argparse.Namespace: Parsed and validated arguments.
     """
-    parser = argparse.ArgumentParser(prog="mlpstorage", description="Script to launch the MLPerf Storage benchmark")
+    # 1. Process the "open" / "closed" mode argument.
+    # It must be the first argument if present.
+    is_open = False
+    is_closed = False
+    
+    if len(sys.argv) > 1 and sys.argv[1] in ["open", "closed"]:
+        mode = sys.argv.pop(1)
+        if mode == "open":
+            is_open = True
+        else:
+            is_closed = True
+    else:
+        # Default to closed if neither is present
+        is_open = False
+        is_closed = True
+
+    if is_closed:
+        print(f"Note: running in a 'closed' configuration, some options are limited or forced to default values")
+    else:
+        print(f"Note: running in an 'open' configuration, all options are available")
+
+    # Note: Added usage string to document the new positional argument
+    parser = argparse.ArgumentParser(
+        prog="mlpstorage", 
+        usage="%(prog)s [open|closed] <program> [options]",
+        description="Script to launch the MLPerf Storage benchmark"
+    )
     parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}")
+
     sub_programs = parser.add_subparsers(dest="program", required=True)
     sub_programs.required = True
 
@@ -89,14 +117,14 @@ def parse_arguments():
         'lockfile': lockfile_parsers,
     }
 
-    # Add arguments using modular builders from cli package
-    add_training_arguments(training_parsers)
-    add_checkpointing_arguments(checkpointing_parsers)
-    add_vectordb_arguments(vectordb_parsers)
-    add_kvcache_arguments(kvcache_parsers)
-    add_reports_arguments(reports_parsers)
-    add_history_arguments(history_parsers)
-    add_lockfile_arguments(lockfile_parsers)
+    # Use existing argument builders
+    add_training_arguments(training_parsers, is_closed)
+    add_checkpointing_arguments(checkpointing_parsers, is_closed)
+    add_vectordb_arguments(vectordb_parsers, is_closed)
+    add_kvcache_arguments(kvcache_parsers, is_closed)
+    add_reports_arguments(reports_parsers, is_closed)
+    add_history_arguments(history_parsers, is_closed)
+    add_lockfile_arguments(lockfile_parsers, is_closed)
 
     # Universal arguments are added within each argument builder now
     # (except for top-level parsers that need them)
@@ -110,7 +138,11 @@ def parse_arguments():
         sys.exit(1)
 
     parsed_args = parser.parse_args()
-    
+
+    # 3. Record the requested attributes onto the parsed namespace
+    parsed_args.open = is_open
+    parsed_args.closed = is_closed
+
     # Apply YAML config file overrides if specified
     if hasattr(parsed_args, 'config_file') and parsed_args.config_file:
         parsed_args = apply_yaml_config_overrides(parsed_args)
@@ -126,10 +158,11 @@ def parse_arguments():
     """
     print(f"Arguments found: {parsed_args}")
     """
-
     validate_args(parsed_args)
+
     return parsed_args
 
+
 def apply_yaml_config_overrides(args):
     """
     Apply overrides from a YAML config file to the parsed arguments.
@@ -198,19 +231,20 @@ def apply_yaml_config_overrides(args):
 
 
 def validate_args(args):
-    error_messages = []
-    # Add generic validations here. Workload specific validation is in the Benchmark classes
-    if args.program == "checkpointing":
-        if args.model not in LLM_MODELS:
-            error_messages.append("Invalid LLM model. Supported models are: {}".format(", ".join(LLM_MODELS)))
-        if args.num_checkpoints_read < 0 or args.num_checkpoints_write < 0:
-            error_messages.append("Number of checkpoints read and write must be non-negative")
-
-    if error_messages:
-        for msg in error_messages:
-            print(msg)
-
-        sys.exit(EXIT_CODE.INVALID_ARGUMENTS)
+    """Validate the whole set of args for the different arg suites
+    
+    Args:
+        args (argparse.Namespace): The parsed command-line arguments
+    """
+    # Validate arguments using checkers from cli package
+    if args.program == 'training':
+        validate_training_arguments(args)
+    if args.program == 'checkpointing':
+        validate_checkpointing_arguments(args)
+    if args.program == 'vectordb':
+        validate_vectordb_arguments(args)
+    if args.program == 'kvcache':
+        validate_kvcache_arguments(args)
 
 
 def update_args(args):
diff --git a/mlpstorage_py/config.py b/mlpstorage_py/config.py
index fb0103fc..6ea5f420 100755
--- a/mlpstorage_py/config.py
+++ b/mlpstorage_py/config.py
@@ -52,12 +52,14 @@ def get_datetime_string():
 RETINANET = "retinanet"
 FLUX = "flux"
 MODELS = [COSMOFLOW, RESNET, UNET, DLRM, RETINANET, FLUX]
+MODELS_CLOSED = [DLRM, RETINANET, FLUX]
 
 H100 = "h100"
 A100 = "a100"
 B200 = "b200"
 MI355 = "mi355"
 ACCELERATORS = [H100, A100, B200, MI355]
+ACCELERATORS_CLOSED = [B200, MI355]
 
 OPEN = "open"
 CLOSED = "closed"
@@ -68,6 +70,7 @@ def get_datetime_string():
 LLAMA3_405B = 'llama3-405b'
 LLAMA3_1T = 'llama3-1t'
 LLM_MODELS = [LLAMA3_70B, LLAMA3_405B, LLAMA3_1T, LLAMA3_8B]
+LLM_MODELS_CLOSED = LLM_MODELS
 
 LLM_SUBSET_PROCS = 8
 # Defined as (MinProcs, ZeroLevel, GPU per Data Parallel Instance, Closed GPU Count)
@@ -93,11 +96,12 @@ def get_datetime_string():
 LLM_MODELS_STRINGS = "\n    ".join(LLM_MODELS)
 
 # KV Cache benchmark model configurations
+KVCACHE_MODEL_DEFAULT = 'llama3.1-8b'
 KVCACHE_MODELS = [
     'tiny-1b',
     'mistral-7b',
     'llama2-7b',
-    'llama3.1-8b',
+    KVCACHE_MODEL_DEFAULT,
     'llama3.1-70b-instruct',
 ]
 
@@ -112,6 +116,7 @@ def get_datetime_string():
 
 # VDB Benchmark Configuration
 VDB_INDEX_TYPES = ["DISKANN", "HNSW", "AISAQ", "IVF_FLAT", "IVF_SQ8", "FLAT"]
+VDB_INDEX_TYPES_CLOSED = ["DISKANN", "HNSW", "AISAQ"]
 VDB_ORCHESTRATION_MODES = ["ssh", "mpi"]
 VDB_BENCHMARK_MODES = ["timed", "query_count", "sweep"]
 
diff --git a/tests/README.md b/tests/README.md
index de8189e4..df196205 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -521,7 +521,7 @@ checkpoint read.  Aggregate throughput reported by `[METRIC]` lines from the ben
 cd /home/eval/Documents/Code/mlp-storage
 source .venv/bin/activate
 
-mlpstorage checkpointing run \
+mlpstorage open checkpointing run \
   --model llama3-8b --num-processes 8 \
   --client-host-memory-in-gb 64 \
   --num-checkpoints-write 1 --num-checkpoints-read 1 \
@@ -547,7 +547,7 @@ raw storage throughput.
 cd /home/eval/Documents/Code/mlp-storage
 source .venv/bin/activate
 
-mlpstorage checkpointing run \
+mlpstorage open checkpointing run \
   --model llama3-8b --num-processes 8 \
   --client-host-memory-in-gb 64 \
   --num-checkpoints-write 1 --num-checkpoints-read 1 \
diff --git a/tests/configs/S3_TESTING_GUIDE.md b/tests/configs/S3_TESTING_GUIDE.md
index 0a749527..6de08060 100644
--- a/tests/configs/S3_TESTING_GUIDE.md
+++ b/tests/configs/S3_TESTING_GUIDE.md
@@ -73,7 +73,7 @@ source .venv/bin/activate
 export DLIO_S3_IMPLEMENTATION=mlp
 
 # Generate small test dataset
-mlpstorage training datagen \
+mlpstorage open training datagen \
   --model unet3d \
   --config test_configs/s3_test_mlp_s3dlio.yaml \
   --param dataset.num_files_train=10
@@ -103,7 +103,7 @@ mc ls local/test-bucket/dlio-test/train/
 ```bash
 export DLIO_S3_IMPLEMENTATION=mlp
 
-mlpstorage training datagen \
+mlpstorage open training datagen \
   --model unet3d \
   --config test_configs/s3_test_mlp_s3torchconnector.yaml \
   --param dataset.num_files_train=10
@@ -127,7 +127,7 @@ mc ls local/test-bucket/dlio-test/train/
 ```bash
 export DLIO_S3_IMPLEMENTATION=mlp
 
-mlpstorage training datagen \
+mlpstorage open training datagen \
   --model unet3d \
   --config test_configs/s3_test_mlp_minio.yaml \
   --param dataset.num_files_train=10
@@ -151,7 +151,7 @@ mc ls local/test-bucket/dlio-test/train/
 ```bash
 export DLIO_S3_IMPLEMENTATION=dpsi
 
-mlpstorage training datagen \
+mlpstorage open training datagen \
   --model unet3d \
   --config test_configs/s3_test_dpsi.yaml \
   --param dataset.num_files_train=10
@@ -183,7 +183,7 @@ mc ls local/test-bucket/dlio-test-dpsi/train/
 
 ```bash
 # Add --param workflow.train=true to test read performance
-mlpstorage training run \
+mlpstorage open training run \
   --model unet3d \
   --config test_configs/s3_test_mlp_s3dlio.yaml \
   --param workflow.generate_data=false \
diff --git a/tests/fixtures/mock_collector.py b/tests/fixtures/mock_collector.py
index f88168ca..522bad19 100755
--- a/tests/fixtures/mock_collector.py
+++ b/tests/fixtures/mock_collector.py
@@ -9,7 +9,7 @@
 from typing import Dict, Any, List, Optional
 from datetime import datetime
 
-from mlpstorage.interfaces.collector import (
+from mlpstorage_py.interfaces.collector import (
     ClusterCollectorInterface,
     CollectionResult,
 )
diff --git a/tests/fixtures/sample_data.py b/tests/fixtures/sample_data.py
index deb576b1..86018ef3 100755
--- a/tests/fixtures/sample_data.py
+++ b/tests/fixtures/sample_data.py
@@ -157,7 +157,7 @@ def create_sample_cluster_info(
     Returns:
         ClusterInformation instance with mock data.
     """
-    from mlpstorage.rules import ClusterInformation, HostInfo, HostMemoryInfo
+    from mlpstorage_py.rules import ClusterInformation, HostInfo, HostMemoryInfo
 
     if logger is None:
         logger = MagicMock()
@@ -275,8 +275,8 @@ def create_sample_benchmark_run_data(
     Returns:
         BenchmarkRunData instance.
     """
-    from mlpstorage.rules import BenchmarkRunData
-    from mlpstorage.config import BENCHMARK_TYPES
+    from mlpstorage_py.rules import BenchmarkRunData
+    from mlpstorage_py.config import BENCHMARK_TYPES
 
     # Map string to enum
     type_map = {
diff --git a/tests/object-store/test_mlp_minio.sh b/tests/object-store/test_mlp_minio.sh
index 77471bbb..2e771eb5 100755
--- a/tests/object-store/test_mlp_minio.sh
+++ b/tests/object-store/test_mlp_minio.sh
@@ -59,7 +59,7 @@ echo "Step 2: Verifying bucket is empty..."
 echo ""
 
 echo "Step 3: Running data generation..."
-DLIO_S3_IMPLEMENTATION=mlp mlpstorage training datagen \
+DLIO_S3_IMPLEMENTATION=mlp mlpstorage open training datagen \
   --model unet3d -np 1 -dd "${DATA_DIR}" \
   --param ${COMMON_PARAMS} ${s3_params}
 
diff --git a/tests/object-store/test_mlp_s3dlio.sh b/tests/object-store/test_mlp_s3dlio.sh
index 523cbe96..518212f8 100755
--- a/tests/object-store/test_mlp_s3dlio.sh
+++ b/tests/object-store/test_mlp_s3dlio.sh
@@ -59,7 +59,7 @@ echo ""
 
 echo "Step 3: Running data generation..."
 set +e  # s3dlio compat layer may still have issues — capture result rather than abort
-DLIO_S3_IMPLEMENTATION=mlp mlpstorage training datagen \
+DLIO_S3_IMPLEMENTATION=mlp mlpstorage open training datagen \
   --model unet3d -np 8 -dd "${DATA_DIR}" \
   --param ${COMMON_PARAMS} ${s3_params}
 
@@ -77,7 +77,7 @@ if [ $RESULT -eq 0 ]; then
     echo "Step 6: Running training..."
     set +e
     export DLIO_S3_IMPLEMENTATION=mlp
-    mlpstorage training run \
+    mlpstorage open training run \
       --model unet3d --allow-run-as-root --skip-validation \
       --num-accelerators 1 --accelerator-type h100 --client-host-memory-in-gb 512 \
       --param ${COMMON_PARAMS} ${s3_params} \
diff --git a/tests/object-store/test_mlp_s3torch.sh b/tests/object-store/test_mlp_s3torch.sh
index e36ccaa1..8794b7e7 100755
--- a/tests/object-store/test_mlp_s3torch.sh
+++ b/tests/object-store/test_mlp_s3torch.sh
@@ -59,7 +59,7 @@ echo "Step 2: Verifying bucket is empty..."
 echo ""
 
 echo "Step 3: Running data generation..."
-DLIO_S3_IMPLEMENTATION=mlp mlpstorage training datagen \
+DLIO_S3_IMPLEMENTATION=mlp mlpstorage open training datagen \
   --model unet3d -np 1 -dd "${DATA_DIR}" \
   --param ${COMMON_PARAMS} ${s3_params}
 
diff --git a/tests/object-store/test_s3dlio_multilib.sh b/tests/object-store/test_s3dlio_multilib.sh
index ac879764..bd6962ed 100644
--- a/tests/object-store/test_s3dlio_multilib.sh
+++ b/tests/object-store/test_s3dlio_multilib.sh
@@ -57,7 +57,7 @@ echo "Step 2: Data generation with s3dlio..."
 # Use storage.storage_library to select s3dlio
 s3_params="storage.storage_type=s3 storage.storage_library=s3dlio storage.storage_options.endpoint_url=${AWS_ENDPOINT_URL} storage.storage_options.access_key_id=${AWS_ACCESS_KEY_ID} storage.storage_options.secret_access_key=${AWS_SECRET_ACCESS_KEY} storage.storage_root=${S3_BUCKET} storage.storage_options.s3_force_path_style=true"
 
-mlpstorage training datagen \
+mlpstorage open training datagen \
   --model unet3d \
   --num-processes 1 \
   --params dataset.num_files_train=${NUM_FILES} \
@@ -78,7 +78,7 @@ echo "Step 3: Verify S3 data with s3-cli..."
 echo ""
 
 echo "Step 4: Training (5 epochs) with s3dlio..."
-timeout 300 mlpstorage training run \
+timeout 300 mlpstorage open training run \
   --model unet3d \
   --num-accelerators=1 \
   --accelerator-type=a100 \

From 6c0b94d8e6ba1e3d6c11afc14a7af9c1fcfc07b5 Mon Sep 17 00:00:00 2001
From: Curtis Anderson <curtis@mlcommons.org>
Date: Tue, 28 Apr 2026 11:16:57 -0700
Subject: [PATCH 2/4] Add unit test

---
 tests/unit/test_cli_new.py | 196 +++++++++++++++++++++++++++++++++++++
 1 file changed, 196 insertions(+)
 create mode 100644 tests/unit/test_cli_new.py

diff --git a/tests/unit/test_cli_new.py b/tests/unit/test_cli_new.py
new file mode 100644
index 00000000..765c94fb
--- /dev/null
+++ b/tests/unit/test_cli_new.py
@@ -0,0 +1,196 @@
+"""
+Comprehensive Tests for the MLPerf Storage CLI parser.
+Validates structural boundaries, subcommand availability, value constraints,
+and explicit verification that 'closed' defaults match 'open' defaults.
+"""
+
+import sys
+import pytest
+import argparse
+from unittest.mock import patch
+from mlpstorage_py.cli_parser import parse_arguments
+from mlpstorage_py.config import EXIT_CODE
+
+# =====================================================================
+# 1. Open vs. Closed Equivalence Tests
+# =====================================================================
+
+def test_kvcache_open_closed_defaults_match():
+    """
+    Verify that all hardcoded defaults in KVCache closed mode exactly
+    match the argparse defaults provided in open mode.
+    """
+    base_args = ['kvcache', 'run', '--file']
+    
+    with patch('sys.argv', ['mlpstorage', 'closed'] + base_args):
+        args_closed = parse_arguments()
+        
+    with patch('sys.argv', ['mlpstorage', 'open'] + base_args):
+        args_open = parse_arguments()
+
+    # Model and Users
+    assert args_closed.model == args_open.model
+    assert args_closed.num_users == args_open.num_users
+    
+    # Cache Tier Memory
+    assert args_closed.gpu_mem_gb == args_open.gpu_mem_gb == 16.0
+    assert args_closed.cpu_mem_gb == args_open.cpu_mem_gb == 32.0
+    
+    # Run configuration
+    assert args_closed.duration == args_open.duration
+    assert args_closed.generation_mode == args_open.generation_mode == 'realistic'
+    assert args_closed.performance_profile == args_open.performance_profile == 'latency'
+    
+    # Optional Features
+    assert args_closed.disable_multi_turn == args_open.disable_multi_turn == False
+    assert args_closed.enable_rag == args_open.enable_rag == True
+    assert args_closed.autoscaler_mode == args_open.autoscaler_mode == 'qos'
+    
+    # Universal Universal/Common arguments defaults
+    assert args_closed.loops == args_open.loops == 1
+
+
+def test_checkpointing_open_closed_defaults_match():
+    """
+    Verify that Checkpointing 'closed' hardcoded defaults (like read/write checkpoints)
+    match the 'open' default values.
+    """
+    # Note: Checkpointing requires some mandatory arguments to pass validation
+    base_args = ['checkpointing', 'run', '-cm', '1024', '-m', 'llama3.1-8b', '-np', '2', '-cf', '/tmp', '--file']
+    
+    with patch('sys.argv', ['mlpstorage', 'closed'] + base_args):
+        args_closed = parse_arguments()
+        
+    with patch('sys.argv', ['mlpstorage', 'open'] + base_args):
+        args_open = parse_arguments()
+
+    assert args_closed.num_checkpoints_read == args_open.num_checkpoints_read == 10
+    assert args_closed.num_checkpoints_write == args_open.num_checkpoints_write == 10
+
+
+def test_training_params_strictness():
+    """
+    Verify that 'params' behaves correctly across modes:
+    Closed mode forces it to an empty string. Open mode leaves it None if unpassed.
+    """
+    base_args = ['training', 'run', '-cm', '1024', '-m', 'unet3d', '-g', 'v100', '-na', '2', '--file']
+    
+    with patch('sys.argv', ['mlpstorage', 'closed'] + base_args):
+        args_closed = parse_arguments()
+        assert args_closed.params == '' # Closed mode strictness
+        
+    with patch('sys.argv', ['mlpstorage', 'open'] + base_args):
+        args_open = parse_arguments()
+        assert args_open.params is None # Open mode default (unspecified)
+
+
+# =====================================================================
+# 2. Structural & Subcommand Combinations (Positive Cases)
+# =====================================================================
+
+@pytest.mark.parametrize("cmd_list, expected_program, expected_command", [
+    # Training combinations
+    (['training', 'datasize', '-cm', '1024', '-m', 'unet3d', '-g', 'v100', '-ma', '4', '--file'], 'training', 'datasize'),
+    (['training', 'datagen', '-m', 'unet3d', '-np', '4', '--file'], 'training', 'datagen'),
+    (['training', 'configview', '-na', '4', '--file'], 'training', 'configview'),
+    
+    # Checkpointing combinations
+    (['checkpointing', 'datasize', '-cm', '1024', '-m', 'llama3.1-8b', '--file'], 'checkpointing', 'datasize'),
+    
+    # KVCache combinations
+    (['kvcache', 'datasize', '--file'], 'kvcache', 'datasize'),
+    
+    # Utilities
+    (['reports', 'reportgen', '--file'], 'reports', 'reportgen'),
+    (['history', 'show', '--file'], 'history', 'show'),
+    (['history', 'rerun', '123', '--file'], 'history', 'rerun'),
+    (['lockfile', 'verify', '--file'], 'lockfile', 'verify'),
+])
+def test_all_program_subcommand_combinations(cmd_list, expected_program, expected_command):
+    """
+    Parametrized test to ensure all major benchmarks and utility subcommands
+    can successfully parse their minimum required arguments in open mode.
+    """
+    test_args = ['mlpstorage', 'open'] + cmd_list
+    with patch('sys.argv', test_args):
+        args = parse_arguments()
+        assert args.open is True
+        assert args.program == expected_program
+        if expected_command:
+            # Handle utilities that map dest="command" or "lockfile_command"
+            cmd_val = getattr(args, 'command', getattr(args, 'lockfile_command', None))
+            assert cmd_val == expected_command
+
+
+# =====================================================================
+# 3. Protocol Parsing & Data Access Protocol Mapping
+# =====================================================================
+
+def test_data_access_protocol_consolidation_file():
+    """Test that --file is correctly consolidated into the data_access_protocol field."""
+    test_args = ['mlpstorage', 'open', 'kvcache', 'run', '--file']
+    with patch('sys.argv', test_args):
+        args = parse_arguments()
+        assert args.data_access_protocol == 'file'
+        assert not hasattr(args, 'file')   # Should be deleted
+        assert not hasattr(args, 'object') # Should be deleted
+
+def test_data_access_protocol_consolidation_object():
+    """Test that --object is correctly consolidated into the data_access_protocol field."""
+    # Training supports object storage. We use training datagen for a simple required args profile.
+    test_args = ['mlpstorage', 'open', 'training', 'datagen', '-m', 'unet3d', '-np', '4', '--object', 's3']
+    with patch('sys.argv', test_args):
+        args = parse_arguments()
+        assert args.data_access_protocol == 's3'
+
+
+# =====================================================================
+# 4. Negative Validation Tests (Value constraints and illegal inputs)
+# =====================================================================
+
+def test_kvcache_rejects_object_storage():
+    """KVCache custom validation should reject object storage outright."""
+    test_args = ['mlpstorage', 'open', 'kvcache', 'run', '--object', 's3']
+    with patch('sys.argv', test_args):
+        with pytest.raises(SystemExit) as exc_info:
+            parse_arguments()
+        assert exc_info.value.code == EXIT_CODE.INVALID_ARGUMENTS
+
+
+def test_checkpointing_rejects_negative_checkpoints():
+    """Checkpointing custom validation should reject negative checkpoint counts."""
+    test_args = [
+        'mlpstorage', 'open', 'checkpointing', 'run', 
+        '-cm', '1024', '-m', 'llama3.1-8b', '-np', '2', '-cf', '/tmp', '--file',
+        '--num-checkpoints-read', '-5'
+    ]
+    with patch('sys.argv', test_args):
+        with pytest.raises(SystemExit) as exc_info:
+            parse_arguments()
+        assert exc_info.value.code == EXIT_CODE.INVALID_ARGUMENTS
+
+
+def test_closed_mode_rejects_open_only_args():
+    """
+    Open-mode arguments (like --timeseries-interval or --allow-invalid-params)
+    should trigger an argparse unrecognized argument error if passed in closed mode.
+    """
+    test_args = [
+        'mlpstorage', 'closed', 'kvcache', 'run', '--file',
+        '--allow-invalid-params'  # Not added to parser in closed mode
+    ]
+    with patch('sys.argv', test_args):
+        with pytest.raises(SystemExit) as exc_info:
+            parse_arguments()
+        # argparse default exit code for unrecognized arguments is 2
+        assert exc_info.value.code != 0
+
+
+def test_missing_required_argument_triggers_exit():
+    """Omitting a required argument (like -m / --model for training) should fail."""
+    test_args = ['mlpstorage', 'open', 'training', 'run', '-cm', '1024', '-g', 'v100', '-na', '2', '--file']
+    # Missing '-m'
+    with patch('sys.argv', test_args):
+        with pytest.raises(SystemExit) as exc_info:
+            parse_arguments()
+        assert exc_info.value.code != 0

From 3ea89877cdf629eff83ed1fefc9d2302b4b8de02 Mon Sep 17 00:00:00 2001
From: Curtis Anderson <curtis@mlcommons.org>
Date: Tue, 28 Apr 2026 13:53:19 -0700
Subject: [PATCH 3/4] Ensure 'results_dir' is reqiured for runs

---
 mlpstorage_py/cli/checkpointing_args.py       |  2 +-
 mlpstorage_py/cli/common_args.py              | 36 +++++++++++--------
 mlpstorage_py/cli/kvcache_args.py             |  5 ++-
 mlpstorage_py/cli/lockfile_args.py            |  4 +--
 mlpstorage_py/cli/training_args.py            |  5 ++-
 mlpstorage_py/cli/utility_args.py             |  4 +--
 mlpstorage_py/cli/vectordb_args.py            |  5 ++-
 mlpstorage_py/cli_parser.py                   |  1 +
 tests/conftest.py                             | 10 +++---
 .../{test_cli_new.py => test_cli_parser.py}   |  0
 10 files changed, 45 insertions(+), 27 deletions(-)
 rename tests/unit/{test_cli_new.py => test_cli_parser.py} (100%)
 mode change 100644 => 100755

diff --git a/mlpstorage_py/cli/checkpointing_args.py b/mlpstorage_py/cli/checkpointing_args.py
index 600baff1..9fc264fc 100755
--- a/mlpstorage_py/cli/checkpointing_args.py
+++ b/mlpstorage_py/cli/checkpointing_args.py
@@ -111,7 +111,7 @@ def add_checkpointing_arguments(parser, is_closed):
         help=HELP_MESSAGES['checkpoint_folder']
     )
 
-    add_universal_arguments(_parser, True, is_closed)
+    add_universal_arguments(_parser, True, True, is_closed)
 
     # Add time-series arguments to run command only
     add_timeseries_arguments(run_benchmark, is_closed)
diff --git a/mlpstorage_py/cli/common_args.py b/mlpstorage_py/cli/common_args.py
index f44460c4..ca915e74 100755
--- a/mlpstorage_py/cli/common_args.py
+++ b/mlpstorage_py/cli/common_args.py
@@ -167,19 +167,28 @@
 }
 
 
-def add_universal_arguments(parser, offer_object, is_closed):
+def add_universal_arguments(parser, req_results, offer_object, is_closed):
     """Add arguments common to all benchmarks and commands.
 
     Args:
         parser: Argparse parser to add arguments to.
     """
     standard_args = parser.add_argument_group("Standard Arguments")
-    standard_args.add_argument(
-        '--results-dir', '-rd',
-        type=str,
-        default=DEFAULT_RESULTS_DIR,
-        help=HELP_MESSAGES['results_dir']
-    )
+    if req_results:
+        standard_args.add_argument(
+            '--results-dir', '-rd',
+            type=str,
+            required=True,
+            default=DEFAULT_RESULTS_DIR,
+            help=HELP_MESSAGES['results_dir']
+        )
+    else:
+        standard_args.add_argument(
+            '--results-dir', '-rd',
+            type=str,
+            default=DEFAULT_RESULTS_DIR,
+            help=HELP_MESSAGES['results_dir']
+        )
 
     if is_closed:
         parser.set_defaults(
@@ -200,13 +209,13 @@ def add_universal_arguments(parser, offer_object, is_closed):
     )
 
     # Create a mutually exclusive group for file/object options
+    access_proto = standard_args.add_mutually_exclusive_group(required=True)
+    access_proto.add_argument(
+        "--file",
+        action="store_true",
+        help="Use POSIX files as the data access method"
+    )
     if offer_object:
-        access_proto = standard_args.add_mutually_exclusive_group(required=True)
-        access_proto.add_argument(
-            "--file",
-            action="store_true",
-            help="Use POSIX files as the data access method"
-        )
         access_proto.add_argument(
             "--object",
             nargs="?",
@@ -217,7 +226,6 @@ def add_universal_arguments(parser, offer_object, is_closed):
         )
     else:
         parser.set_defaults(
-            file=True,
             object=False
         )
 
diff --git a/mlpstorage_py/cli/kvcache_args.py b/mlpstorage_py/cli/kvcache_args.py
index 7a7f2d38..07bcafdc 100755
--- a/mlpstorage_py/cli/kvcache_args.py
+++ b/mlpstorage_py/cli/kvcache_args.py
@@ -90,7 +90,10 @@ def add_kvcache_arguments(parser, is_closed):
     for _parser in [run_benchmark, datasize]:
         _add_kvcache_model_arguments(_parser, is_closed)
         _add_kvcache_cache_arguments(_parser, is_closed)
-        add_universal_arguments(_parser, False, is_closed)
+        if _parser == run_benchmark:
+            add_universal_arguments(_parser, True, False, is_closed)
+        else:
+            add_universal_arguments(_parser, False, False, is_closed)
 
     # Run-specific arguments
     _add_kvcache_run_arguments(run_benchmark, is_closed)
diff --git a/mlpstorage_py/cli/lockfile_args.py b/mlpstorage_py/cli/lockfile_args.py
index 5755a6b2..29851387 100755
--- a/mlpstorage_py/cli/lockfile_args.py
+++ b/mlpstorage_py/cli/lockfile_args.py
@@ -54,7 +54,7 @@ def add_lockfile_arguments(parser, is_closed):
         dest="generate_all",
         help="Generate both requirements.txt and requirements-full.txt",
     )
-    add_universal_arguments(generate_parser, True, is_closed)
+    add_universal_arguments(generate_parser, True, True, is_closed)
 
     # Verify subcommand
     verify_parser = subparsers.add_parser(
@@ -83,6 +83,6 @@ def add_lockfile_arguments(parser, is_closed):
         action="store_true",
         help="Fail on any difference (default: fail only on version mismatch)",
     )
-    add_universal_arguments(verify_parser, True, is_closed)
+    add_universal_arguments(verify_parser, True, True, is_closed)
 
     return parser
diff --git a/mlpstorage_py/cli/training_args.py b/mlpstorage_py/cli/training_args.py
index b289b19d..7b238db0 100755
--- a/mlpstorage_py/cli/training_args.py
+++ b/mlpstorage_py/cli/training_args.py
@@ -140,7 +140,10 @@ def add_training_arguments(parser, is_closed):
             help="Filesystem location for data"
         )
         add_dlio_arguments(_parser, is_closed)
-        add_universal_arguments(_parser, True, is_closed)
+        if _parser == run_benchmark:
+            add_universal_arguments(_parser, True, True, is_closed)
+        else:
+            add_universal_arguments(_parser, False, True, is_closed)
 
     # Add time-series arguments to run command only
     add_timeseries_arguments(run_benchmark, is_closed)
diff --git a/mlpstorage_py/cli/utility_args.py b/mlpstorage_py/cli/utility_args.py
index b55c87a2..889cd941 100755
--- a/mlpstorage_py/cli/utility_args.py
+++ b/mlpstorage_py/cli/utility_args.py
@@ -35,7 +35,7 @@ def add_reports_arguments(parser, is_closed):
         help=HELP_MESSAGES['output_dir']
     )
 
-    add_universal_arguments(reportgen, True, is_closed)
+    add_universal_arguments(reportgen, True, True, is_closed)
 
 
 def add_history_arguments(parser, is_closed):
@@ -77,4 +77,4 @@ def add_history_arguments(parser, is_closed):
     )
 
     for _parser in [history, rerun]:
-        add_universal_arguments(_parser, True, is_closed)
+        add_universal_arguments(_parser, True, True, is_closed)
diff --git a/mlpstorage_py/cli/vectordb_args.py b/mlpstorage_py/cli/vectordb_args.py
index 64ee5a46..271b2328 100755
--- a/mlpstorage_py/cli/vectordb_args.py
+++ b/mlpstorage_py/cli/vectordb_args.py
@@ -200,7 +200,10 @@ def add_vectordb_arguments(parser, is_closed):
  
     # Add universal arguments to all subcommands
     for _parser in [datasize, datagen, run_benchmark]:
-        add_universal_arguments(_parser, True, is_closed)
+        if _parser == run_benchmark:
+            add_universal_arguments(_parser, True, False, is_closed)
+        else:
+            add_universal_arguments(_parser, False, False, is_closed)
  
     # Add time-series arguments to run command only
     add_timeseries_arguments(run_benchmark, is_closed)
diff --git a/mlpstorage_py/cli_parser.py b/mlpstorage_py/cli_parser.py
index fae56443..5f6cccbf 100755
--- a/mlpstorage_py/cli_parser.py
+++ b/mlpstorage_py/cli_parser.py
@@ -57,6 +57,7 @@ def parse_arguments():
         print(f"Note: running in a 'closed' configuration, some options are limited or forced to default values")
     else:
         print(f"Note: running in an 'open' configuration, all options are available")
+    print(f"ARGV=", sys.argv)
 
     # Note: Added usage string to document the new positional argument
     parser = argparse.ArgumentParser(
diff --git a/tests/conftest.py b/tests/conftest.py
index 0e57dc75..dae44478 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,5 @@
 """
-Shared pytest fixtures for mlpstorage tests.
+Shared pytest fixtures for mlpstorage_py tests.
 
 These fixtures provide mock data, loggers, and test utilities that can be
 used across all test modules without requiring DLIO to be installed.
@@ -24,7 +24,7 @@
 
 import pytest
 
-from mlpstorage.config import BENCHMARK_TYPES, PARAM_VALIDATION
+from mlpstorage_py.config import BENCHMARK_TYPES, PARAM_VALIDATION
 
 # Import from fixtures package
 from tests.fixtures import (
@@ -293,7 +293,7 @@ def sample_checkpointing_parameters() -> Dict[str, Any]:
 @pytest.fixture
 def sample_cluster_info(mock_logger):
     """Create a sample ClusterInformation object."""
-    from mlpstorage.rules import ClusterInformation, HostInfo, HostMemoryInfo
+    from mlpstorage_py.rules import ClusterInformation, HostInfo, HostMemoryInfo
 
     host_info_list = [
         HostInfo(
@@ -313,7 +313,7 @@ def sample_cluster_info(mock_logger):
 @pytest.fixture
 def sample_benchmark_run_data(sample_training_parameters, sample_cluster_info):
     """Create a sample BenchmarkRunData for testing."""
-    from mlpstorage.rules import BenchmarkRunData
+    from mlpstorage_py.rules import BenchmarkRunData
 
     return BenchmarkRunData(
         benchmark_type=BENCHMARK_TYPES.training,
@@ -480,7 +480,7 @@ def mock_benchmark_instance(training_run_args, sample_training_parameters, sampl
 @pytest.fixture
 def clean_env(monkeypatch):
     """
-    Remove mlpstorage-related environment variables.
+    Remove mlpstorage_py-related environment variables.
 
     Usage:
         def test_check_env_default(clean_env):
diff --git a/tests/unit/test_cli_new.py b/tests/unit/test_cli_parser.py
old mode 100644
new mode 100755
similarity index 100%
rename from tests/unit/test_cli_new.py
rename to tests/unit/test_cli_parser.py

From c4a776cf2488071a351864dedfd8ec3cdabce146 Mon Sep 17 00:00:00 2001
From: Curtis Anderson <curtis@mlcommons.org>
Date: Tue, 28 Apr 2026 17:03:45 -0700
Subject: [PATCH 4/4] Add a unit test

---
 mlpstorage_py/cli/checkpointing_args.py |   5 +-
 mlpstorage_py/cli/common_args.py        |  46 ++--
 mlpstorage_py/cli/kvcache_args.py       | 104 ++++----
 mlpstorage_py/cli/lockfile_args.py      |   4 +-
 mlpstorage_py/cli/training_args.py      |   9 +-
 mlpstorage_py/cli/utility_args.py       |   4 +-
 mlpstorage_py/cli/vectordb_args.py      |   8 +-
 mlpstorage_py/config.py                 |   2 +-
 tests/unit/test_cli_parser.py           | 314 ++++++++++++------------
 9 files changed, 250 insertions(+), 246 deletions(-)

diff --git a/mlpstorage_py/cli/checkpointing_args.py b/mlpstorage_py/cli/checkpointing_args.py
index 9fc264fc..3b9efdd0 100755
--- a/mlpstorage_py/cli/checkpointing_args.py
+++ b/mlpstorage_py/cli/checkpointing_args.py
@@ -66,7 +66,7 @@ def add_checkpointing_arguments(parser, is_closed):
             )
 
         if is_closed:
-            parser.set_defaults(
+            _parser.set_defaults(
                 num_checkpoints_read=10,
                 num_checkpoints_write=10
             )
@@ -111,7 +111,8 @@ def add_checkpointing_arguments(parser, is_closed):
         help=HELP_MESSAGES['checkpoint_folder']
     )
 
-    add_universal_arguments(_parser, True, True, is_closed)
+    add_universal_arguments(run_benchmark, True, True, True, is_closed)
+    add_universal_arguments(datasize, False, False, True, is_closed)
 
     # Add time-series arguments to run command only
     add_timeseries_arguments(run_benchmark, is_closed)
diff --git a/mlpstorage_py/cli/common_args.py b/mlpstorage_py/cli/common_args.py
index ca915e74..2ecbd00d 100755
--- a/mlpstorage_py/cli/common_args.py
+++ b/mlpstorage_py/cli/common_args.py
@@ -167,7 +167,7 @@
 }
 
 
-def add_universal_arguments(parser, req_results, offer_object, is_closed):
+def add_universal_arguments(parser, req_fileobj, req_results, offer_object, is_closed):
     """Add arguments common to all benchmarks and commands.
 
     Args:
@@ -191,7 +191,7 @@ def add_universal_arguments(parser, req_results, offer_object, is_closed):
         )
 
     if is_closed:
-        parser.set_defaults(
+        standard_args.set_defaults(
             loops=1
         )
     else:
@@ -208,24 +208,30 @@ def add_universal_arguments(parser, req_results, offer_object, is_closed):
         help="Path to YAML file with argument overrides"
     )
 
-    # Create a mutually exclusive group for file/object options
-    access_proto = standard_args.add_mutually_exclusive_group(required=True)
-    access_proto.add_argument(
-        "--file",
-        action="store_true",
-        help="Use POSIX files as the data access method"
-    )
-    if offer_object:
+    if req_fileobj:
+        # Create a mutually exclusive group for file/object options
+        access_proto = standard_args.add_mutually_exclusive_group(required=True)
         access_proto.add_argument(
-            "--object",
-            nargs="?",
-            type=str,
-            const="s3",
-            choices=["s3"],
-            help="Use the given Object API as the data access method, defaults to S3"
+            "--file",
+            action="store_true",
+            help="Use POSIX files as the data access method"
         )
+        if offer_object:
+            access_proto.add_argument(
+                "--object",
+                nargs="?",
+                type=str,
+                const="s3",
+                choices=["s3"],
+                help="Use the given Object API as the data access method, defaults to S3"
+            )
+        else:
+            access_proto.set_defaults(
+                object=False
+            )
     else:
-        parser.set_defaults(
+        standard_args.set_defaults(
+            file=True,
             object=False
         )
 
@@ -246,7 +252,11 @@ def add_universal_arguments(parser, req_results, offer_object, is_closed):
         default="INFO"
     )
 
-    if not is_closed:
+    if is_closed:
+        output_control.set_defaults(
+            allow_invalid_params=False
+        )
+    else:
         output_control.add_argument(
             "--allow-invalid-params", "-aip",
             action="store_true",
diff --git a/mlpstorage_py/cli/kvcache_args.py b/mlpstorage_py/cli/kvcache_args.py
index 07bcafdc..2f99d57c 100755
--- a/mlpstorage_py/cli/kvcache_args.py
+++ b/mlpstorage_py/cli/kvcache_args.py
@@ -90,10 +90,9 @@ def add_kvcache_arguments(parser, is_closed):
     for _parser in [run_benchmark, datasize]:
         _add_kvcache_model_arguments(_parser, is_closed)
         _add_kvcache_cache_arguments(_parser, is_closed)
-        if _parser == run_benchmark:
-            add_universal_arguments(_parser, True, False, is_closed)
-        else:
-            add_universal_arguments(_parser, False, False, is_closed)
+
+    add_universal_arguments(run_benchmark, False, True, False, is_closed)
+    add_universal_arguments(datasize, False, False, False, is_closed)
 
     # Run-specific arguments
     _add_kvcache_run_arguments(run_benchmark, is_closed)
@@ -110,26 +109,19 @@ def _add_kvcache_model_arguments(parser, is_closed):
     Args:
         parser: Argparse parser to add arguments to.
     """
-    if is_closed:
-        # Hardcode the default values into the namespace without presenting CLI options
-        parser.set_defaults(
-            model=KVCACHE_MODEL_DEFAULT,
-            num_users=100
-        )
-    else:
-        model_group = parser.add_argument_group("Model Configuration")
-        model_group.add_argument(
-            '--model', '-m',
-            choices=KVCACHE_MODELS,
-            default='llama3.1-8b',
-            help=KVCACHE_HELP_MESSAGES['kvcache_model']
-        )
-        model_group.add_argument(
-            '--num-users', '-nu',
-            type=int,
-            default=100,
-            help=KVCACHE_HELP_MESSAGES['num_users']
-        )
+    model_group = parser.add_argument_group("Model Configuration")
+    model_group.add_argument(
+        '--model', '-m',
+        choices=KVCACHE_MODELS,
+        required=True,
+        help=KVCACHE_HELP_MESSAGES['kvcache_model']
+    )
+    model_group.add_argument(
+        '--num-users', '-nu',
+        type=int,
+        required=True,
+        help=KVCACHE_HELP_MESSAGES['num_users']
+    )
 
 
 def _add_kvcache_cache_arguments(parser, is_closed):
@@ -225,38 +217,38 @@ def _add_kvcache_optional_features(parser, is_closed):
                 autoscaler_mode='qos'
         )
     else:
-            features_group.add_argument(
-                '--disable-multi-turn',
-                action='store_true',
-                help=KVCACHE_HELP_MESSAGES['disable_multi_turn']
-            )
-            features_group.add_argument(
-                '--disable-prefix-caching',
-                action='store_true',
-                help=KVCACHE_HELP_MESSAGES['disable_prefix_caching']
-            )
-            features_group.add_argument(
-                '--enable-rag',
-                action='store_true',
-                help=KVCACHE_HELP_MESSAGES['enable_rag']
-            )
-            features_group.add_argument(
-                '--rag-num-docs',
-                type=int,
-                default=10,
-                help=KVCACHE_HELP_MESSAGES['rag_num_docs']
-            )
-            features_group.add_argument(
-                '--enable-autoscaling',
-                action='store_true',
-                help=KVCACHE_HELP_MESSAGES['enable_autoscaling']
-            )
-            features_group.add_argument(
-                '--autoscaler-mode',
-                choices=['qos', 'predictive'],
-                default='qos',
-                help=KVCACHE_HELP_MESSAGES['autoscaler_mode']
-            )
+        features_group.add_argument(
+            '--disable-multi-turn',
+            action='store_true',
+            help=KVCACHE_HELP_MESSAGES['disable_multi_turn']
+        )
+        features_group.add_argument(
+            '--disable-prefix-caching',
+            action='store_true',
+            help=KVCACHE_HELP_MESSAGES['disable_prefix_caching']
+        )
+        features_group.add_argument(
+            '--enable-rag',
+            action='store_true',
+            help=KVCACHE_HELP_MESSAGES['enable_rag']
+        )
+        features_group.add_argument(
+            '--rag-num-docs',
+            type=int,
+            default=10,
+            help=KVCACHE_HELP_MESSAGES['rag_num_docs']
+        )
+        features_group.add_argument(
+            '--enable-autoscaling',
+            action='store_true',
+            help=KVCACHE_HELP_MESSAGES['enable_autoscaling']
+        )
+        features_group.add_argument(
+            '--autoscaler-mode',
+            choices=['qos', 'predictive'],
+            default='qos',
+            help=KVCACHE_HELP_MESSAGES['autoscaler_mode']
+        )
 
 
 def _add_kvcache_distributed_arguments(parser, is_closed):
diff --git a/mlpstorage_py/cli/lockfile_args.py b/mlpstorage_py/cli/lockfile_args.py
index 29851387..8b085044 100755
--- a/mlpstorage_py/cli/lockfile_args.py
+++ b/mlpstorage_py/cli/lockfile_args.py
@@ -54,7 +54,7 @@ def add_lockfile_arguments(parser, is_closed):
         dest="generate_all",
         help="Generate both requirements.txt and requirements-full.txt",
     )
-    add_universal_arguments(generate_parser, True, True, is_closed)
+    add_universal_arguments(generate_parser, True, True, True, is_closed)
 
     # Verify subcommand
     verify_parser = subparsers.add_parser(
@@ -83,6 +83,6 @@ def add_lockfile_arguments(parser, is_closed):
         action="store_true",
         help="Fail on any difference (default: fail only on version mismatch)",
     )
-    add_universal_arguments(verify_parser, True, True, is_closed)
+    add_universal_arguments(verify_parser, True, True, True, is_closed)
 
     return parser
diff --git a/mlpstorage_py/cli/training_args.py b/mlpstorage_py/cli/training_args.py
index 7b238db0..389762b0 100755
--- a/mlpstorage_py/cli/training_args.py
+++ b/mlpstorage_py/cli/training_args.py
@@ -140,10 +140,11 @@ def add_training_arguments(parser, is_closed):
             help="Filesystem location for data"
         )
         add_dlio_arguments(_parser, is_closed)
-        if _parser == run_benchmark:
-            add_universal_arguments(_parser, True, True, is_closed)
-        else:
-            add_universal_arguments(_parser, False, True, is_closed)
+
+    add_universal_arguments(datasize, False, False, False, is_closed)
+    add_universal_arguments(datagen, True, True, True, is_closed)
+    add_universal_arguments(run_benchmark, True, True, True, is_closed)
+    add_universal_arguments(configview, False, True, True, is_closed)
 
     # Add time-series arguments to run command only
     add_timeseries_arguments(run_benchmark, is_closed)
diff --git a/mlpstorage_py/cli/utility_args.py b/mlpstorage_py/cli/utility_args.py
index 889cd941..b2a5455b 100755
--- a/mlpstorage_py/cli/utility_args.py
+++ b/mlpstorage_py/cli/utility_args.py
@@ -35,7 +35,7 @@ def add_reports_arguments(parser, is_closed):
         help=HELP_MESSAGES['output_dir']
     )
 
-    add_universal_arguments(reportgen, True, True, is_closed)
+    add_universal_arguments(reportgen, True, True, True, is_closed)
 
 
 def add_history_arguments(parser, is_closed):
@@ -77,4 +77,4 @@ def add_history_arguments(parser, is_closed):
     )
 
     for _parser in [history, rerun]:
-        add_universal_arguments(_parser, True, True, is_closed)
+        add_universal_arguments(_parser, True, True, True, is_closed)
diff --git a/mlpstorage_py/cli/vectordb_args.py b/mlpstorage_py/cli/vectordb_args.py
index 271b2328..35c64fb1 100755
--- a/mlpstorage_py/cli/vectordb_args.py
+++ b/mlpstorage_py/cli/vectordb_args.py
@@ -199,11 +199,9 @@ def add_vectordb_arguments(parser, is_closed):
     )
  
     # Add universal arguments to all subcommands
-    for _parser in [datasize, datagen, run_benchmark]:
-        if _parser == run_benchmark:
-            add_universal_arguments(_parser, True, False, is_closed)
-        else:
-            add_universal_arguments(_parser, False, False, is_closed)
+    add_universal_arguments(datasize, False, False, False, is_closed)
+    add_universal_arguments(datagen, True, True, True, is_closed)
+    add_universal_arguments(run_benchmark, True, True, True, is_closed)
  
     # Add time-series arguments to run command only
     add_timeseries_arguments(run_benchmark, is_closed)
diff --git a/mlpstorage_py/config.py b/mlpstorage_py/config.py
index 6ea5f420..900bab96 100755
--- a/mlpstorage_py/config.py
+++ b/mlpstorage_py/config.py
@@ -101,7 +101,7 @@ def get_datetime_string():
     'tiny-1b',
     'mistral-7b',
     'llama2-7b',
-    KVCACHE_MODEL_DEFAULT,
+    'llama3.1-8b',
     'llama3.1-70b-instruct',
 ]
 
diff --git a/tests/unit/test_cli_parser.py b/tests/unit/test_cli_parser.py
index 765c94fb..30fd3195 100755
--- a/tests/unit/test_cli_parser.py
+++ b/tests/unit/test_cli_parser.py
@@ -1,196 +1,198 @@
 """
 Comprehensive Tests for the MLPerf Storage CLI parser.
 Validates structural boundaries, subcommand availability, value constraints,
-and explicit verification that 'closed' defaults match 'open' defaults.
+YAML overrides, post-parse argument updates, and 'closed' vs 'open' parity.
 """
 
 import sys
 import pytest
 import argparse
-from unittest.mock import patch
-from mlpstorage_py.cli_parser import parse_arguments
+from unittest.mock import patch, mock_open
+from mlpstorage_py.cli_parser import parse_arguments, update_args, apply_yaml_config_overrides
 from mlpstorage_py.config import EXIT_CODE
 
 # =====================================================================
-# 1. Open vs. Closed Equivalence Tests
+# 1. Open vs. Closed Equivalence & Constraints Tests
 # =====================================================================
 
-def test_kvcache_open_closed_defaults_match():
-    """
-    Verify that all hardcoded defaults in KVCache closed mode exactly
-    match the argparse defaults provided in open mode.
-    """
-    base_args = ['kvcache', 'run', '--file']
+class TestOpenClosedEquivalence:
     
-    with patch('sys.argv', ['mlpstorage', 'closed'] + base_args):
-        args_closed = parse_arguments()
+    def test_kvcache_open_closed_defaults_match(self):
+        """Verify hardcoded defaults in KVCache closed mode exactly match argparse open mode defaults."""
+        base_args = ['kvcache', 'run', '-rd', '/tmp', '-m', 'llama3.1-8b', '-nu', '100']
         
-    with patch('sys.argv', ['mlpstorage', 'open'] + base_args):
-        args_open = parse_arguments()
-
-    # Model and Users
-    assert args_closed.model == args_open.model
-    assert args_closed.num_users == args_open.num_users
-    
-    # Cache Tier Memory
-    assert args_closed.gpu_mem_gb == args_open.gpu_mem_gb == 16.0
-    assert args_closed.cpu_mem_gb == args_open.cpu_mem_gb == 32.0
-    
-    # Run configuration
-    assert args_closed.duration == args_open.duration
-    assert args_closed.generation_mode == args_open.generation_mode == 'realistic'
-    assert args_closed.performance_profile == args_open.performance_profile == 'latency'
-    
-    # Optional Features
-    assert args_closed.disable_multi_turn == args_open.disable_multi_turn == False
-    assert args_closed.enable_rag == args_open.enable_rag == True
-    assert args_closed.autoscaler_mode == args_open.autoscaler_mode == 'qos'
-    
-    # Universal Universal/Common arguments defaults
-    assert args_closed.loops == args_open.loops == 1
-
-
-def test_checkpointing_open_closed_defaults_match():
-    """
-    Verify that Checkpointing 'closed' hardcoded defaults (like read/write checkpoints)
-    match the 'open' default values.
-    """
-    # Note: Checkpointing requires some mandatory arguments to pass validation
-    base_args = ['checkpointing', 'run', '-cm', '1024', '-m', 'llama3.1-8b', '-np', '2', '-cf', '/tmp', '--file']
-    
-    with patch('sys.argv', ['mlpstorage', 'closed'] + base_args):
-        args_closed = parse_arguments()
+        with patch('sys.argv', ['mlpstorage', 'closed'] + base_args):
+            args_closed = parse_arguments()
+            
+        with patch('sys.argv', ['mlpstorage', 'open'] + base_args):
+            args_open = parse_arguments()
+
+        # Check attributes forced by closed mode
+        assert args_closed.model == args_open.model == 'llama3.1-8b'
+        assert args_closed.gpu_mem_gb == args_open.gpu_mem_gb == 16.0
+        assert args_closed.duration == args_open.duration == 60
+        assert args_closed.loops == args_open.loops == 1
+        assert args_closed.disable_multi_turn == args_open.disable_multi_turn == False
+
+    def test_checkpointing_open_closed_defaults_match(self):
+        """Verify Checkpointing 'closed' forces read/write checkpoint counts to match 'open' defaults."""
+        base_args = ['checkpointing', 'run', '-cm', '1024', '-m', 'llama3-8b', '-np', '2', '-cf', '/tmp/ckpt', '-rd', '/tmp', '--file']
         
-    with patch('sys.argv', ['mlpstorage', 'open'] + base_args):
-        args_open = parse_arguments()
-
-    assert args_closed.num_checkpoints_read == args_open.num_checkpoints_read == 10
-    assert args_closed.num_checkpoints_write == args_open.num_checkpoints_write == 10
+        with patch('sys.argv', ['mlpstorage', 'closed'] + base_args):
+            args_closed = parse_arguments()
+            
+        with patch('sys.argv', ['mlpstorage', 'open'] + base_args):
+            args_open = parse_arguments()
 
+        assert args_closed.num_checkpoints_read == args_open.num_checkpoints_read == 10
+        assert args_closed.num_checkpoints_write == args_open.num_checkpoints_write == 10
 
-def test_training_params_strictness():
-    """
-    Verify that 'params' behaves correctly across modes:
-    Closed mode forces it to an empty string. Open mode leaves it None if unpassed.
-    """
-    base_args = ['training', 'run', '-cm', '1024', '-m', 'unet3d', '-g', 'v100', '-na', '2', '--file']
-    
-    with patch('sys.argv', ['mlpstorage', 'closed'] + base_args):
-        args_closed = parse_arguments()
-        assert args_closed.params == '' # Closed mode strictness
-        
-    with patch('sys.argv', ['mlpstorage', 'open'] + base_args):
-        args_open = parse_arguments()
-        assert args_open.params is None # Open mode default (unspecified)
+    def test_closed_mode_strips_open_args(self):
+        """Open-mode arguments should trigger an unrecognized argument error if passed in closed mode."""
+        test_args = ['mlpstorage', 'closed', 'kvcache', 'run', '-rd', '/tmp', '--allow-invalid-params']
+        with patch('sys.argv', test_args):
+            with pytest.raises(SystemExit) as exc_info:
+                parse_arguments()
+            assert exc_info.value.code != 0
 
 
 # =====================================================================
 # 2. Structural & Subcommand Combinations (Positive Cases)
 # =====================================================================
 
-@pytest.mark.parametrize("cmd_list, expected_program, expected_command", [
-    # Training combinations
-    (['training', 'datasize', '-cm', '1024', '-m', 'unet3d', '-g', 'v100', '-ma', '4', '--file'], 'training', 'datasize'),
-    (['training', 'datagen', '-m', 'unet3d', '-np', '4', '--file'], 'training', 'datagen'),
-    (['training', 'configview', '-na', '4', '--file'], 'training', 'configview'),
-    
-    # Checkpointing combinations
-    (['checkpointing', 'datasize', '-cm', '1024', '-m', 'llama3.1-8b', '--file'], 'checkpointing', 'datasize'),
-    
-    # KVCache combinations
-    (['kvcache', 'datasize', '--file'], 'kvcache', 'datasize'),
-    
-    # Utilities
-    (['reports', 'reportgen', '--file'], 'reports', 'reportgen'),
-    (['history', 'show', '--file'], 'history', 'show'),
-    (['history', 'rerun', '123', '--file'], 'history', 'rerun'),
-    (['lockfile', 'verify', '--file'], 'lockfile', 'verify'),
-])
-def test_all_program_subcommand_combinations(cmd_list, expected_program, expected_command):
-    """
-    Parametrized test to ensure all major benchmarks and utility subcommands
-    can successfully parse their minimum required arguments in open mode.
-    """
-    test_args = ['mlpstorage', 'open'] + cmd_list
-    with patch('sys.argv', test_args):
-        args = parse_arguments()
-        assert args.open is True
-        assert args.program == expected_program
-        if expected_command:
-            # Handle utilities that map dest="command" or "lockfile_command"
+class TestCLIStructureAndCombinations:
+
+    @pytest.mark.parametrize("test_name, cmd_list, expected_program, expected_command", [
+        # Training (Run requires -rd)
+        ("01", ['training', 'run', '-cm', '1024', '-m', 'retinanet', '-g', 'b200', '-na', '4', '-rd', '/tmp', '--file'], 'training', 'run'),
+        ("02", ['training', 'datasize', '-cm', '1024', '-m', 'flux', '-g', 'b200', '-ma', '4'], 'training', 'datasize'),
+        ("03", ['training', 'datagen', '-m', 'dlrm', '-np', '4', '--file',  '-rd', '/tmp'], 'training', 'datagen'),
+        ("04", ['training', 'configview', '-na', '4',  '-rd', '/tmp'], 'training', 'configview'),
+        
+        # Checkpointing (Note: datasize lacks universal args due to how it's built in checkpointing_args.py)
+        ("05", ['checkpointing', 'run', '-cm', '1024', '-m', 'llama3-8b', '-np', '4', '-cf', '/tmp/ckpt', '-rd', '/tmp', '--file'], 'checkpointing', 'run'),
+        ("06", ['checkpointing', 'datasize', '-cm', '1024', '-m', 'llama3-8b'], 'checkpointing', 'datasize'),
+        
+        # KVCache
+        ("07", ['kvcache', 'run', '-rd', '/tmp', '-m', 'llama3.1-8b', '-nu', '100'], 'kvcache', 'run'),
+        ("08", ['kvcache', 'datasize', '-m', 'llama3.1-8b', '-nu', '100'], 'kvcache', 'datasize'),
+        
+        # VectorDB
+        ("09", ['vectordb', 'run', '-rd', '/tmp', '--file'], 'vectordb', 'run'),
+        ("10", ['vectordb', 'datagen', '--file',  '-rd', '/tmp'], 'vectordb', 'datagen'),
+        ("11", ['vectordb', 'datasize'], 'vectordb', 'datasize'),
+        
+        # Utilities (All require -rd)
+        ("12", ['reports', 'reportgen', '-rd', '/tmp', '--file'], 'reports', 'reportgen'),
+        ("13", ['history', 'show', '-rd', '/tmp', '--file'], 'history', 'show'),
+        ("14", ['lockfile', 'generate', '-rd', '/tmp', '--file'], 'lockfile', 'generate'),
+        ("15", ['lockfile', 'verify', '-rd', '/tmp', '--file'], 'lockfile', 'verify'),
+    ])
+    def test_all_program_subcommand_combinations(self, test_name, cmd_list, expected_program, expected_command):
+        """Ensure all benchmarks and subcommands can parse their minimum required arguments."""
+        test_args = ['mlpstorage', 'closed'] + cmd_list
+        with patch('sys.argv', test_args):
+            args = parse_arguments()
+            assert test_name != None and args.closed is True
+            assert test_name != None and args.program == expected_program
             cmd_val = getattr(args, 'command', getattr(args, 'lockfile_command', None))
-            assert cmd_val == expected_command
+            assert test_name != None and cmd_val == expected_command
+
+    def test_missing_required_results_dir(self):
+        """Omitting -rd when req_results=True (e.g., training run) should fail."""
+        test_args = ['mlpstorage', 'closed', 'training', 'run', '-cm', '1024', '-m', 'flux', '-g', 'b200', '-na', '4', '--file']
+        with patch('sys.argv', test_args):
+            with pytest.raises(SystemExit) as exc_info:
+                parse_arguments()
+            assert exc_info.value.code != 0
+
+    def test_data_access_protocol_consolidation(self):
+        """Test that --file/--object are correctly popped and consolidated into data_access_protocol."""
+        test_args = ['mlpstorage', 'closed', 'training', 'datagen', '-m', 'dlrm', '-np', '4', '--object', 's3', '-rd', '/tmp']
+        with patch('sys.argv', test_args):
+            args = parse_arguments()
+            assert args.data_access_protocol == 's3'
+            assert not hasattr(args, 'file')
+            assert not hasattr(args, 'object')
 
 
 # =====================================================================
-# 3. Protocol Parsing & Data Access Protocol Mapping
+# 3. Validation Rules
 # =====================================================================
 
-def test_data_access_protocol_consolidation_file():
-    """Test that --file is correctly consolidated into the data_access_protocol field."""
-    test_args = ['mlpstorage', 'open', 'kvcache', 'run', '--file']
-    with patch('sys.argv', test_args):
-        args = parse_arguments()
-        assert args.data_access_protocol == 'file'
-        assert not hasattr(args, 'file')   # Should be deleted
-        assert not hasattr(args, 'object') # Should be deleted
-
-def test_data_access_protocol_consolidation_object():
-    """Test that --object is correctly consolidated into the data_access_protocol field."""
-    # Training supports object storage. We use training datagen for a simple required args profile.
-    test_args = ['mlpstorage', 'open', 'training', 'datagen', '-m', 'unet3d', '-np', '4', '--object', 's3']
-    with patch('sys.argv', test_args):
-        args = parse_arguments()
-        assert args.data_access_protocol == 's3'
+class TestCustomValidation:
+
+    def test_kvcache_rejects_object_storage(self):
+        """KVCache validate_args should reject object storage."""
+        test_args = ['mlpstorage', 'closed', 'kvcache', 'run', '-rd', '/tmp', '--object', 's3']
+        with patch('sys.argv', test_args):
+            with pytest.raises(SystemExit) as exc_info:
+                parse_arguments()
+            assert exc_info.value.code == EXIT_CODE.INVALID_ARGUMENTS
+
+    def test_checkpointing_rejects_negative_checkpoints(self):
+        """Checkpointing validate_args should reject negative checkpoint counts."""
+        test_args = [
+            'mlpstorage', 'closed', 'checkpointing', 'run', 
+            '-cm', '1024', '-m', 'llama3-8b', '-np', '2', '-cf', '/tmp/ckpt', '-rd', '/tmp', '--file',
+            '--num-checkpoints-read', '-5'
+        ]
+        with patch('sys.argv', test_args):
+            with pytest.raises(SystemExit) as exc_info:
+                parse_arguments()
+            assert exc_info.value.code == EXIT_CODE.INVALID_ARGUMENTS
 
 
 # =====================================================================
-# 4. Negative Validation Tests (Value constraints and illegal inputs)
+# 4. Post-Parse Configuration (update_args & YAML)
 # =====================================================================
 
-def test_kvcache_rejects_object_storage():
-    """KVCache custom validation should reject object storage outright."""
-    test_args = ['mlpstorage', 'open', 'kvcache', 'run', '--object', 's3']
-    with patch('sys.argv', test_args):
-        with pytest.raises(SystemExit) as exc_info:
-            parse_arguments()
-        assert exc_info.value.code == EXIT_CODE.INVALID_ARGUMENTS
+class TestUpdateArgsAndConfig:
 
+    def test_update_args_normalizes_hosts(self):
+        """update_args should handle messy host strings and normalize them to a clean list."""
+        args = argparse.Namespace(hosts="host1, host2   host3,host4")
+        update_args(args)
+        assert args.hosts == ['host1', 'host2', 'host3', 'host4']
+        assert args.num_client_hosts == 4
 
-def test_checkpointing_rejects_negative_checkpoints():
-    """Checkpointing custom validation should reject negative checkpoint counts."""
-    test_args = [
-        'mlpstorage', 'open', 'checkpointing', 'run', 
-        '-cm', '1024', '-m', 'llama3.1-8b', '-np', '2', '-cf', '/tmp', '--file',
-        '--num-checkpoints-read', '-5'
-    ]
-    with patch('sys.argv', test_args):
+    def test_update_args_empty_hosts_fails(self):
+        """update_args should exit if host normalization results in an empty list."""
+        args = argparse.Namespace(hosts=" , , ")
         with pytest.raises(SystemExit) as exc_info:
-            parse_arguments()
+            update_args(args)
         assert exc_info.value.code == EXIT_CODE.INVALID_ARGUMENTS
 
-
-def test_closed_mode_rejects_open_only_args():
-    """
-    Open-mode arguments (like --timeseries-interval or --allow-invalid-params)
-    should trigger an argparse unrecognized argument error if passed in closed mode.
-    """
-    test_args = [
-        'mlpstorage', 'closed', 'kvcache', 'run', '--file',
-        '--allow-invalid-params'  # Not added to parser in closed mode
-    ]
-    with patch('sys.argv', test_args):
-        with pytest.raises(SystemExit) as exc_info:
-            parse_arguments()
-        # argparse default exit code for unrecognized arguments is 2
-        assert exc_info.value.code != 0
-
-
-def test_missing_required_argument_triggers_exit():
-    """Omitting a required argument (like -m / --model for training) should fail."""
-    test_args = ['mlpstorage', 'open', 'training', 'run', '-cm', '1024', '-g', 'v100', '-na', '2', '--file']
-    # Missing '-m'
-    with patch('sys.argv', test_args):
-        with pytest.raises(SystemExit) as exc_info:
-            parse_arguments()
-        assert exc_info.value.code != 0
+    def test_update_args_process_nomenclature_mapping(self):
+        """update_args should unify 'num_accelerators' or 'max_accelerators' into 'num_processes'."""
+        args = argparse.Namespace(num_accelerators=8)
+        update_args(args)
+        assert args.num_processes == 8
+
+    def test_update_args_flattens_params(self):
+        """update_args should flatten lists of lists for params/mpi_params resulting from multiple append actions."""
+        args = argparse.Namespace(params=[['key=val1'], ['key=val2']])
+        update_args(args)
+        assert args.params == ['key=val1', 'key=val2']
+
+    def test_yaml_config_overrides(self):
+        """apply_yaml_config_overrides should update namespace attributes safely."""
+        mock_yaml_content = """
+        duration: 999
+        hosts: "node1,node2"
+        params:
+            batch_size: 32
+        """
+        # Create a namespace simulating a parsed output
+        initial_args = argparse.Namespace(
+            config_file="dummy.yaml",
+            duration=100,
+            hosts=['default_host'],
+            params=None
+        )
+        
+        with patch("builtins.open", mock_open(read_data=mock_yaml_content)):
+            updated_args = apply_yaml_config_overrides(initial_args)
+            
+        assert updated_args.duration == 999
+        assert updated_args.hosts == ['node1', 'node2'] # Special yaml handling for hosts