From 221895b806f1312b1ba83194b5f6e2541e0b2803 Mon Sep 17 00:00:00 2001 From: Fsocietyhhh <1211904451@qq.com> Date: Thu, 28 May 2026 23:13:34 -0700 Subject: [PATCH] fix(image): override SolanaLLMClient default timeout for slow image models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SDK default (`blockrun_llm.solana_client.DEFAULT_TIMEOUT = 60.0`) is too short for slow image models on the synchronous Solana gateway path. On `openai/gpt-image-2` with non-trivial prompts the upstream takes 90-180s end-to-end; the sidecar throws `httpx.ReadTimeout` mid-call, returns 500 to clients, and LiteLLM Proxy then retries 3-4 times — each attempt pays the gateway (which still successfully settles in the background per the optimistic-settle fix on blockrun-sol) yet the user sees only failures and burns ~$0.25 per request without an image. Reproduced end-to-end against sol.blockrun.ai with the user-reported failure case (gpt-image-2 + complex Chinese prompt, 1024x1024) via the full LiteLLM Proxy → blockrun-litellm sidecar → blockrun-llm SDK chain. With the default 60s timeout: 4 retries, 240s wall, 500s, no image. With this fix: HTTP 200 in 104s, real image URL returned. Changes: - `get_image_client` passes an explicit `timeout=_SOLANA_IMAGE_TIMEOUT_S` (default 300s, overridable via env var `BLOCKRUN_SOLANA_IMAGE_TIMEOUT`). - Bump version to 0.3.10. - Tests: 2 new cases covering the explicit-timeout contract and the env override. Existing tests updated to accept the extra kwarg on `FakeSolanaClient.__init__`. 49 pass. Belt-and-suspenders fix: the SDK should also bump its default, tracked separately at BlockRunAI/blockrun-llm. This change unblocks sidecar users immediately, without needing the SDK to release first. Refs: BlockRunAI/blockrun-llm#6 --- blockrun_litellm/__init__.py | 2 +- blockrun_litellm/_adapter.py | 12 +++++++ pyproject.toml | 2 +- tests/test_adapter_solana.py | 64 ++++++++++++++++++++++++++++++++++-- 4 files changed, 75 insertions(+), 5 deletions(-) diff --git a/blockrun_litellm/__init__.py b/blockrun_litellm/__init__.py index bc8ca4a..d552b52 100644 --- a/blockrun_litellm/__init__.py +++ b/blockrun_litellm/__init__.py @@ -26,4 +26,4 @@ from blockrun_litellm.provider import BlockRunLLM, register __all__ = ["BlockRunLLM", "register", "enable_local_logging"] -__version__ = "0.3.9" +__version__ = "0.3.10" diff --git a/blockrun_litellm/_adapter.py b/blockrun_litellm/_adapter.py index 38096dd..093a4c4 100644 --- a/blockrun_litellm/_adapter.py +++ b/blockrun_litellm/_adapter.py @@ -81,6 +81,17 @@ def _is_solana_url(api_url: Optional[str]) -> bool: concurrent.futures.ThreadPoolExecutor(max_workers=20) ) +# Per-call timeout for the image SolanaLLMClient. The SDK default +# (``blockrun_llm.solana_client.DEFAULT_TIMEOUT`` = 60s) is too short for +# slow models such as ``openai/gpt-image-2`` (90-180s end-to-end on the +# synchronous Solana gateway path) — under the default, the sidecar throws +# ``httpx.ReadTimeout`` before the gateway can return the generated image, +# even though the gateway has already accepted payment and is producing the +# result. Overridable via env var ``BLOCKRUN_SOLANA_IMAGE_TIMEOUT`` for ops +# tuning without redeploy. See https://github.com/BlockRunAI/blockrun-llm +# for the matching upstream change. +_SOLANA_IMAGE_TIMEOUT_S = float(os.environ.get("BLOCKRUN_SOLANA_IMAGE_TIMEOUT", "300")) + def _wallet_env_var(api_url: Optional[str]) -> str: """Which env var to consult for the default wallet on this chain.""" @@ -328,6 +339,7 @@ def get_image_client( client = SolanaLLMClient( private_key=private_key, api_url=api_url or SOLANA_API_URL, + timeout=_SOLANA_IMAGE_TIMEOUT_S, ) else: client = ImageClient(private_key=private_key, api_url=api_url) diff --git a/pyproject.toml b/pyproject.toml index 29ef9ae..e92f6e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "blockrun-litellm" -version = "0.3.9" +version = "0.3.10" description = "LiteLLM adapter for BlockRun — call x402-paid AI models via LiteLLM (custom provider or local OpenAI-compatible proxy)" readme = "README.md" license = "MIT" diff --git a/tests/test_adapter_solana.py b/tests/test_adapter_solana.py index 556127b..873bf7a 100644 --- a/tests/test_adapter_solana.py +++ b/tests/test_adapter_solana.py @@ -75,7 +75,7 @@ def test_solana_client_routes_through_sync_factory(monkeypatch): instances: list[Any] = [] class FakeSolanaClient: - def __init__(self, *, private_key=None, api_url=None): + def __init__(self, *, private_key=None, api_url=None, **kwargs): instances.append({"private_key": private_key, "api_url": api_url}) # Reset the module-level cache so the patched class is used. @@ -127,7 +127,7 @@ def test_image_client_routes_to_solana(monkeypatch): instances: list[Any] = [] class FakeSolanaClient: - def __init__(self, *, private_key=None, api_url=None): + def __init__(self, *, private_key=None, api_url=None, **kwargs): instances.append({"private_key": private_key, "api_url": api_url}) monkeypatch.setattr(_adapter, "_image_clients", {}) @@ -174,7 +174,7 @@ def model_dump(self, exclude_none=True): return {"data": [{"url": "https://example/img.png"}]} class FakeSolanaClient: - def __init__(self, *, private_key=None, api_url=None): + def __init__(self, *, private_key=None, api_url=None, **kwargs): pass def image(self, prompt, *, model=None, size=None, n=1): @@ -200,3 +200,61 @@ def image(self, prompt, *, model=None, size=None, n=1): "size": "1024x1024", "n": 1, } + + +def test_solana_image_client_overrides_sdk_default_timeout(monkeypatch): + """get_image_client must pass an explicit timeout to SolanaLLMClient. + + The SDK default (``blockrun_llm.solana_client.DEFAULT_TIMEOUT`` = 60s) + is too short for slow models such as ``openai/gpt-image-2`` (90-180s + end-to-end on the synchronous Solana gateway). Without an override, + the sidecar throws ``httpx.ReadTimeout`` mid-generation and clients + see 500 / payment-rejected even though the gateway already shipped + the image. + """ + captured: dict = {} + + class FakeSolanaClient: + def __init__(self, *, private_key=None, api_url=None, timeout=None): + captured["timeout"] = timeout + + monkeypatch.setattr(_adapter, "_image_clients", {}) + monkeypatch.setattr(_adapter, "SolanaLLMClient", FakeSolanaClient) + monkeypatch.setattr(_adapter, "_HAS_SOLANA", True) + + _adapter.get_image_client( + api_url="https://sol.blockrun.ai/api", + private_key="bogus", + ) + assert captured["timeout"] is not None, "timeout must be passed explicitly" + assert captured["timeout"] >= 180.0, ( + f"timeout {captured['timeout']}s is not safely above slow-image-model " + "generation time (90-180s typical)" + ) + + +def test_solana_image_timeout_env_override(monkeypatch): + """``BLOCKRUN_SOLANA_IMAGE_TIMEOUT`` env var lets ops tune without a redeploy.""" + monkeypatch.setenv("BLOCKRUN_SOLANA_IMAGE_TIMEOUT", "420") + import importlib + from blockrun_litellm import _adapter as adapter_mod + importlib.reload(adapter_mod) + + captured: dict = {} + + class FakeSolanaClient: + def __init__(self, *, private_key=None, api_url=None, timeout=None): + captured["timeout"] = timeout + + monkeypatch.setattr(adapter_mod, "_image_clients", {}) + monkeypatch.setattr(adapter_mod, "SolanaLLMClient", FakeSolanaClient) + monkeypatch.setattr(adapter_mod, "_HAS_SOLANA", True) + + adapter_mod.get_image_client( + api_url="https://sol.blockrun.ai/api", + private_key="bogus", + ) + assert captured["timeout"] == 420.0 + + monkeypatch.delenv("BLOCKRUN_SOLANA_IMAGE_TIMEOUT", raising=False) + importlib.reload(adapter_mod)