From 221895b806f1312b1ba83194b5f6e2541e0b2803 Mon Sep 17 00:00:00 2001
From: Fsocietyhhh <1211904451@qq.com>
Date: Thu, 28 May 2026 23:13:34 -0700
Subject: [PATCH] fix(image): override SolanaLLMClient default timeout for slow
 image models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SDK default (`blockrun_llm.solana_client.DEFAULT_TIMEOUT = 60.0`) is
too short for slow image models on the synchronous Solana gateway path.
On `openai/gpt-image-2` with non-trivial prompts the upstream takes
90-180s end-to-end; the sidecar throws `httpx.ReadTimeout` mid-call,
returns 500 to clients, and LiteLLM Proxy then retries 3-4 times — each
attempt pays the gateway (which still successfully settles in the
background per the optimistic-settle fix on blockrun-sol) yet the user
sees only failures and burns ~$0.25 per request without an image.

Reproduced end-to-end against sol.blockrun.ai with the user-reported
failure case (gpt-image-2 + complex Chinese prompt, 1024x1024) via the
full LiteLLM Proxy → blockrun-litellm sidecar → blockrun-llm SDK chain.
With the default 60s timeout: 4 retries, 240s wall, 500s, no image.
With this fix: HTTP 200 in 104s, real image URL returned.

Changes:
  - `get_image_client` passes an explicit `timeout=_SOLANA_IMAGE_TIMEOUT_S`
    (default 300s, overridable via env var
    `BLOCKRUN_SOLANA_IMAGE_TIMEOUT`).
  - Bump version to 0.3.10.
  - Tests: 2 new cases covering the explicit-timeout contract and the env
    override. Existing tests updated to accept the extra kwarg on
    `FakeSolanaClient.__init__`. 49 pass.

Belt-and-suspenders fix: the SDK should also bump its default, tracked
separately at BlockRunAI/blockrun-llm. This change unblocks sidecar users
immediately, without needing the SDK to release first.

Refs: BlockRunAI/blockrun-llm#6
---
 blockrun_litellm/__init__.py |  2 +-
 blockrun_litellm/_adapter.py | 12 +++++++
 pyproject.toml               |  2 +-
 tests/test_adapter_solana.py | 64 ++++++++++++++++++++++++++++++++++--
 4 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/blockrun_litellm/__init__.py b/blockrun_litellm/__init__.py
index bc8ca4a..d552b52 100644
--- a/blockrun_litellm/__init__.py
+++ b/blockrun_litellm/__init__.py
@@ -26,4 +26,4 @@
 from blockrun_litellm.provider import BlockRunLLM, register
 
 __all__ = ["BlockRunLLM", "register", "enable_local_logging"]
-__version__ = "0.3.9"
+__version__ = "0.3.10"
diff --git a/blockrun_litellm/_adapter.py b/blockrun_litellm/_adapter.py
index 38096dd..093a4c4 100644
--- a/blockrun_litellm/_adapter.py
+++ b/blockrun_litellm/_adapter.py
@@ -81,6 +81,17 @@ def _is_solana_url(api_url: Optional[str]) -> bool:
     concurrent.futures.ThreadPoolExecutor(max_workers=20)
 )
 
+# Per-call timeout for the image SolanaLLMClient. The SDK default
+# (``blockrun_llm.solana_client.DEFAULT_TIMEOUT`` = 60s) is too short for
+# slow models such as ``openai/gpt-image-2`` (90-180s end-to-end on the
+# synchronous Solana gateway path) — under the default, the sidecar throws
+# ``httpx.ReadTimeout`` before the gateway can return the generated image,
+# even though the gateway has already accepted payment and is producing the
+# result. Overridable via env var ``BLOCKRUN_SOLANA_IMAGE_TIMEOUT`` for ops
+# tuning without redeploy. See https://github.com/BlockRunAI/blockrun-llm
+# for the matching upstream change.
+_SOLANA_IMAGE_TIMEOUT_S = float(os.environ.get("BLOCKRUN_SOLANA_IMAGE_TIMEOUT", "300"))
+
 
 def _wallet_env_var(api_url: Optional[str]) -> str:
     """Which env var to consult for the default wallet on this chain."""
@@ -328,6 +339,7 @@ def get_image_client(
                 client = SolanaLLMClient(
                     private_key=private_key,
                     api_url=api_url or SOLANA_API_URL,
+                    timeout=_SOLANA_IMAGE_TIMEOUT_S,
                 )
             else:
                 client = ImageClient(private_key=private_key, api_url=api_url)
diff --git a/pyproject.toml b/pyproject.toml
index 29ef9ae..e92f6e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "blockrun-litellm"
-version = "0.3.9"
+version = "0.3.10"
 description = "LiteLLM adapter for BlockRun — call x402-paid AI models via LiteLLM (custom provider or local OpenAI-compatible proxy)"
 readme = "README.md"
 license = "MIT"
diff --git a/tests/test_adapter_solana.py b/tests/test_adapter_solana.py
index 556127b..873bf7a 100644
--- a/tests/test_adapter_solana.py
+++ b/tests/test_adapter_solana.py
@@ -75,7 +75,7 @@ def test_solana_client_routes_through_sync_factory(monkeypatch):
     instances: list[Any] = []
 
     class FakeSolanaClient:
-        def __init__(self, *, private_key=None, api_url=None):
+        def __init__(self, *, private_key=None, api_url=None, **kwargs):
             instances.append({"private_key": private_key, "api_url": api_url})
 
     # Reset the module-level cache so the patched class is used.
@@ -127,7 +127,7 @@ def test_image_client_routes_to_solana(monkeypatch):
     instances: list[Any] = []
 
     class FakeSolanaClient:
-        def __init__(self, *, private_key=None, api_url=None):
+        def __init__(self, *, private_key=None, api_url=None, **kwargs):
             instances.append({"private_key": private_key, "api_url": api_url})
 
     monkeypatch.setattr(_adapter, "_image_clients", {})
@@ -174,7 +174,7 @@ def model_dump(self, exclude_none=True):
             return {"data": [{"url": "https://example/img.png"}]}
 
     class FakeSolanaClient:
-        def __init__(self, *, private_key=None, api_url=None):
+        def __init__(self, *, private_key=None, api_url=None, **kwargs):
             pass
 
         def image(self, prompt, *, model=None, size=None, n=1):
@@ -200,3 +200,61 @@ def image(self, prompt, *, model=None, size=None, n=1):
         "size": "1024x1024",
         "n": 1,
     }
+
+
+def test_solana_image_client_overrides_sdk_default_timeout(monkeypatch):
+    """get_image_client must pass an explicit timeout to SolanaLLMClient.
+
+    The SDK default (``blockrun_llm.solana_client.DEFAULT_TIMEOUT`` = 60s)
+    is too short for slow models such as ``openai/gpt-image-2`` (90-180s
+    end-to-end on the synchronous Solana gateway). Without an override,
+    the sidecar throws ``httpx.ReadTimeout`` mid-generation and clients
+    see 500 / payment-rejected even though the gateway already shipped
+    the image.
+    """
+    captured: dict = {}
+
+    class FakeSolanaClient:
+        def __init__(self, *, private_key=None, api_url=None, timeout=None):
+            captured["timeout"] = timeout
+
+    monkeypatch.setattr(_adapter, "_image_clients", {})
+    monkeypatch.setattr(_adapter, "SolanaLLMClient", FakeSolanaClient)
+    monkeypatch.setattr(_adapter, "_HAS_SOLANA", True)
+
+    _adapter.get_image_client(
+        api_url="https://sol.blockrun.ai/api",
+        private_key="bogus",
+    )
+    assert captured["timeout"] is not None, "timeout must be passed explicitly"
+    assert captured["timeout"] >= 180.0, (
+        f"timeout {captured['timeout']}s is not safely above slow-image-model "
+        "generation time (90-180s typical)"
+    )
+
+
+def test_solana_image_timeout_env_override(monkeypatch):
+    """``BLOCKRUN_SOLANA_IMAGE_TIMEOUT`` env var lets ops tune without a redeploy."""
+    monkeypatch.setenv("BLOCKRUN_SOLANA_IMAGE_TIMEOUT", "420")
+    import importlib
+    from blockrun_litellm import _adapter as adapter_mod
+    importlib.reload(adapter_mod)
+
+    captured: dict = {}
+
+    class FakeSolanaClient:
+        def __init__(self, *, private_key=None, api_url=None, timeout=None):
+            captured["timeout"] = timeout
+
+    monkeypatch.setattr(adapter_mod, "_image_clients", {})
+    monkeypatch.setattr(adapter_mod, "SolanaLLMClient", FakeSolanaClient)
+    monkeypatch.setattr(adapter_mod, "_HAS_SOLANA", True)
+
+    adapter_mod.get_image_client(
+        api_url="https://sol.blockrun.ai/api",
+        private_key="bogus",
+    )
+    assert captured["timeout"] == 420.0
+
+    monkeypatch.delenv("BLOCKRUN_SOLANA_IMAGE_TIMEOUT", raising=False)
+    importlib.reload(adapter_mod)