diff --git a/serving/spec_compression_stress.sh b/serving/spec_compression_stress.sh index cdf2d667..0d93f332 100755 --- a/serving/spec_compression_stress.sh +++ b/serving/spec_compression_stress.sh @@ -199,6 +199,7 @@ echo "Workload: $NUM_REQ requests, prompt=$PROMPT_LEN, decode=$OUTPUT_LEN, t=0 echo "H100: ${H100_SCALE}x (nominal: tflops=989 mem_bw=3350)" echo "Spec: draft=$DRAFT_TOKEN_COUNT alpha=$ACCEPTANCE_RATE max_pending=$MAX_PENDING_TOKENS max_verify_tokens=$MAX_VERIFY_TOKENS" echo "Compress: ratio=$KV_COMPRESSION_RATIO period=$KV_COMPRESSION_PERIOD" +echo "Scheduler: prefix-caching=off chunked-prefill=off" echo "Log level: $LOG_LEVEL" echo "" @@ -230,6 +231,8 @@ run_mode() { --output "$csv" --num-req "$NUM_REQ" --max-num-batched-tokens "$MAX_NUM_BATCHED_TOKENS" + --no-enable-prefix-caching + --no-enable-chunked-prefill --log-level "$LOG_LEVEL" "${extra_args[@]}")