From 276d5ef23b95b83af4fdecc800e9182ee952740d Mon Sep 17 00:00:00 2001 From: Jacob Fu <141651335+FuJacob@users.noreply.github.com> Date: Thu, 28 May 2026 01:36:15 -0700 Subject: [PATCH] Bump llama.cpp to b9310 to fix Metal deadlock on macOS 26 (Cotabby #262) The bundled llama.framework at b8665 deadlocks at model load on Apple Silicon under macOS 26 (Tahoe): the main thread blocks on a pthread mutex while ggml-metal's residency-set keep-alive thread spins in __semwait_signal. Reproduces deterministically with Qwen3.5-2B-Q4_K_M on an M4 / macOS 26.4.1. The underlying fix landed upstream around b8882 (ggml-org/llama.cpp#20141): AGX_RELAX_CDM_CTXSTORE_TIMEOUT is set unconditionally inside ggml-metal, preventing the IOGPU command-buffer timeout that left the loader stuck. b8665 predates that fix. b9310 was validated working on the reporter's hardware. swift build + swift test pass against b9310; the C++ wrapper compiles without changes, so CotabbyInferenceEngine.h ABI is unchanged and downstream Cotabby picks this up with no further edits. --- Package.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Package.swift b/Package.swift index 6d0ebcb..1827dbc 100644 --- a/Package.swift +++ b/Package.swift @@ -14,8 +14,8 @@ let package = Package( targets: [ .binaryTarget( name: "llama-cpp", - url: "https://github.com/ggml-org/llama.cpp/releases/download/b8665/llama-b8665-xcframework.zip", - checksum: "5279c975a0ad136eb0ca29bb6390735b949bc0bed0f803124538e341315cb8f7" + url: "https://github.com/ggml-org/llama.cpp/releases/download/b9310/llama-b9310-xcframework.zip", + checksum: "e2411e2e1a875d38d7e1cd478ea5ba2db1b70817bcd36c624f2e952fd017eb83" ), .target( name: "CotabbyInferenceEngine",