From 61e53eec9c525484b7a6e7745f6e03761e7b1007 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 19 May 2026 05:20:32 +0000 Subject: [PATCH] Cache get_config by model_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_config() opens and parses configs/model/.json on every call with no caching. calculate_sizes() (memory_model.py:797) calls get_config() unconditionally as its first line, and is itself called ~20-30 times per scheduler iteration (per layer, once directly from _emit_layer and once indirectly via XPURooflineModel._traffic_bytes). For a 129-iter --analytical-modeling baseline on Llama-3.1-8B, this showed up in the pyinstrument profile as ~100 ms across the calculate_sizes / _resolve_layer_latency call sites — same pattern as the _load_architecture (PR #36) and inline-chakra (PR #37) fixes. Adds a small module-level _config_cache mirroring _arch_cache / _perf_db_cache in trace_generator.py. Wall-clock on 129-iter baseline (5 runs, mean ± stdev): before: 1.57 s ± 0.06 after: 1.36 s ± 0.05 (-210 ms, -13%) Sim-time output (Total clocks, Mean TTFT/TPOT/ITL) unchanged. --- serving/core/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/serving/core/utils.py b/serving/core/utils.py index 218dc4eb..5d9e0cc4 100644 --- a/serving/core/utils.py +++ b/serving/core/utils.py @@ -3,6 +3,11 @@ import json +# key: model_name +# value: parsed configs/model/.json dict +_config_cache = {} + + # Formatting string for a trace file's per-layer row. Kept in this # module because trace writers live across the codebase and import it # as the canonical row template. @@ -56,6 +61,8 @@ def formatter(layername, comp_time, input_loc, input_size, weight_loc, weight_si def get_config(model_name): + if model_name in _config_cache: + return _config_cache[model_name] base_dir = os.path.dirname(os.path.abspath(__file__)) serving_dir = os.path.dirname(base_dir) repo_root = os.path.dirname(serving_dir) @@ -79,6 +86,7 @@ def get_config(model_name): f"{', '.join(candidate_paths)}. Please add the corresponding config file." ) + _config_cache[model_name] = config return config