From 7670a1fc8a4e265fbda4335bd66784f259444f88 Mon Sep 17 00:00:00 2001
From: RioPlay <rio@rioplay.dev>
Date: Sat, 2 May 2026 02:07:44 -0500
Subject: [PATCH 1/9] chore(deepinfra): add 34 new models and update pricing on
 6 existing

- Update pricing on GLM-5, MiniMax-M2.5, Kimi-K2.5, Qwen3.5-35B-A3B,
  Qwen3.6-35B-A3B, Qwen3-Coder-480B-A35B-Instruct-Turbo to match
  deepinfra.com current rates
- Add 5 nvidia models (Nemotron 3 series)
- Add 7 deepseek-ai models (V4-Flash, V3 variants, R1 variants)
- Add 5 google models (gemma 4 and gemma 3 series)
- Add 13 Qwen models (Qwen3.5, Qwen3.6, Qwen3, Qwen3-VL, Qwen2.5)
- Add Step-3.5-Flash, 2 meta-llama, 1 phi-4, 3 mistralai
- Add gpt-oss-120b-Turbo, 2 NousResearch, Gryphe, 2 Sao10K
---
 .../models/Gryphe/MythoMax-L2-13b.toml        | 21 +++++++++++++++
 .../models/MiniMaxAI/MiniMax-M2.5.toml        |  6 ++---
 .../NousResearch/Hermes-3-Llama-3.1-405B.toml | 23 ++++++++++++++++
 .../NousResearch/Hermes-3-Llama-3.1-70B.toml  | 23 ++++++++++++++++
 .../models/Qwen/Qwen2.5-72B-Instruct.toml     | 23 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3-14B.toml      | 23 ++++++++++++++++
 .../Qwen/Qwen3-235B-A22B-Instruct-2507.toml   | 23 ++++++++++++++++
 .../Qwen/Qwen3-235B-A22B-Thinking-2507.toml   | 24 +++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3-30B-A3B.toml  | 23 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3-32B.toml      | 23 ++++++++++++++++
 .../Qwen3-Coder-480B-A35B-Instruct-Turbo.toml |  5 ++--
 .../Qwen/Qwen3-Next-80B-A3B-Instruct.toml     | 23 ++++++++++++++++
 .../Qwen/Qwen3-VL-235B-A22B-Instruct.toml     | 24 +++++++++++++++++
 .../Qwen/Qwen3-VL-30B-A3B-Instruct.toml       | 23 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.5-0.8B.toml   | 22 ++++++++++++++++
 .../models/Qwen/Qwen3.5-122B-A10B.toml        | 23 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.5-27B.toml    | 23 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.5-2B.toml     | 22 ++++++++++++++++
 .../models/Qwen/Qwen3.5-35B-A3B.toml          |  5 ++--
 .../deepinfra/models/Qwen/Qwen3.5-4B.toml     | 22 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.5-9B.toml     | 22 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.6-27B.toml    | 23 ++++++++++++++++
 .../models/Qwen/Qwen3.6-35B-A3B.toml          |  4 +--
 .../models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml | 22 ++++++++++++++++
 .../models/Sao10K/L3.1-70B-Euryale-v2.2.toml  | 22 ++++++++++++++++
 .../deepseek-ai/DeepSeek-R1-0528-Turbo.toml   | 22 ++++++++++++++++
 .../DeepSeek-R1-Distill-Llama-70B.toml        | 22 ++++++++++++++++
 .../models/deepseek-ai/DeepSeek-V3-0324.toml  | 24 +++++++++++++++++
 .../deepseek-ai/DeepSeek-V3.1-Terminus.toml   | 24 +++++++++++++++++
 .../models/deepseek-ai/DeepSeek-V3.1.toml     | 24 +++++++++++++++++
 .../models/deepseek-ai/DeepSeek-V3.toml       | 23 ++++++++++++++++
 .../models/deepseek-ai/DeepSeek-V4-Flash.toml | 23 ++++++++++++++++
 .../models/google/gemma-3-12b-it.toml         | 23 ++++++++++++++++
 .../models/google/gemma-3-27b-it.toml         | 23 ++++++++++++++++
 .../models/google/gemma-3-4b-it.toml          | 23 ++++++++++++++++
 .../models/google/gemma-4-26B-A4B-it.toml     | 23 ++++++++++++++++
 .../models/google/gemma-4-31B-it.toml         | 23 ++++++++++++++++
 .../Llama-3.2-11B-Vision-Instruct.toml        | 22 ++++++++++++++++
 .../models/meta-llama/Llama-Guard-4-12B.toml  | 22 ++++++++++++++++
 .../deepinfra/models/microsoft/phi-4.toml     | 23 ++++++++++++++++
 .../mistralai/Mistral-Nemo-Instruct-2407.toml | 23 ++++++++++++++++
 .../Mistral-Small-24B-Instruct-2501.toml      | 23 ++++++++++++++++
 .../Mistral-Small-3.2-24B-Instruct-2506.toml  | 23 ++++++++++++++++
 .../models/moonshotai/Kimi-K2.5.toml          |  6 ++---
 .../Llama-3.3-Nemotron-Super-49B-v1.5.toml    | 22 ++++++++++++++++
 .../NVIDIA-Nemotron-3-Super-120B-A12B.toml    | 22 ++++++++++++++++
 .../nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml    | 21 +++++++++++++++
 .../nvidia/Nemotron-3-Nano-30B-A3B.toml       | 22 ++++++++++++++++
 ...emotron-3-Nano-Omni-30B-A3B-Reasoning.toml | 24 +++++++++++++++++
 .../models/openai/gpt-oss-120b-Turbo.toml     | 22 ++++++++++++++++
 .../models/stepfun-ai/Step-3.5-Flash.toml     | 26 +++++++++++++++++++
 .../deepinfra/models/zai-org/GLM-4.7.toml     |  2 +-
 providers/deepinfra/models/zai-org/GLM-5.toml |  6 ++---
 53 files changed, 1065 insertions(+), 18 deletions(-)
 create mode 100644 providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
 create mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
 create mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-14B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-32B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.6-27B.toml
 create mode 100644 providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
 create mode 100644 providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml
 create mode 100644 providers/deepinfra/models/google/gemma-3-12b-it.toml
 create mode 100644 providers/deepinfra/models/google/gemma-3-27b-it.toml
 create mode 100644 providers/deepinfra/models/google/gemma-3-4b-it.toml
 create mode 100644 providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
 create mode 100644 providers/deepinfra/models/google/gemma-4-31B-it.toml
 create mode 100644 providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
 create mode 100644 providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
 create mode 100644 providers/deepinfra/models/microsoft/phi-4.toml
 create mode 100644 providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
 create mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
 create mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
 create mode 100644 providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
 create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
 create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
 create mode 100644 providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
 create mode 100644 providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml
 create mode 100644 providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
 create mode 100644 providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml

diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
new file mode 100644
index 000000000..f66b57a4c
--- /dev/null
+++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
@@ -0,0 +1,21 @@
+# https://deepinfra.com/Gryphe/MythoMax-L2-13b
+name = "MythoMax L2 13B"
+family = "llama"
+release_date = "2023-11-01"
+last_updated = "2023-11-01"
+attachment = false
+reasoning = false
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.40
+output = 0.40
+
+[limit]
+context = 4096
+output = 2048
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
index bbbdbe470..0a3d9dcf8 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
@@ -11,10 +11,8 @@ knowledge = "2025-06"
 open_weights = true
 
 [cost]
-input = 0.27
-output = 0.95
-cache_read = 0.03
-cache_write = 0.375
+input = 0.15
+output = 1.15
 
 [limit]
 context = 204_800
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
new file mode 100644
index 000000000..dccf1e193
--- /dev/null
+++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/NousResearch/Hermes-3-Llama-3.1-405B
+name = "Hermes 3 Llama 3.1 405B"
+family = "hermes"
+release_date = "2024-10-01"
+last_updated = "2024-10-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = true
+
+[cost]
+input = 1.00
+output = 1.00
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
new file mode 100644
index 000000000..3bb7f2a06
--- /dev/null
+++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/NousResearch/Hermes-3-Llama-3.1-70B
+name = "Hermes 3 Llama 3.1 70B"
+family = "hermes"
+release_date = "2024-10-01"
+last_updated = "2024-10-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = true
+
+[cost]
+input = 0.30
+output = 0.30
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
new file mode 100644
index 000000000..e459c46a1
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen2.5-72B-Instruct
+name = "Qwen2.5 72B Instruct"
+family = "qwen"
+release_date = "2024-12-01"
+last_updated = "2024-12-01"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = true
+
+[cost]
+input = 0.36
+output = 0.40
+
+[limit]
+context = 32_768
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3-14B.toml b/providers/deepinfra/models/Qwen/Qwen3-14B.toml
new file mode 100644
index 000000000..021b0fb44
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-14B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-14B
+name = "Qwen3 14B"
+family = "qwen"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.12
+output = 0.24
+
+[limit]
+context = 40_960
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
new file mode 100644
index 000000000..63d303946
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-235B-A22B-Instruct-2507
+name = "Qwen3 235B A22B Instruct 2507"
+family = "qwen"
+release_date = "2025-07-01"
+last_updated = "2025-07-01"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.071
+output = 0.10
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
new file mode 100644
index 000000000..ce52431dc
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/Qwen/Qwen3-235B-A22B-Thinking-2507
+name = "Qwen3 235B A22B Thinking 2507"
+family = "qwen"
+release_date = "2025-07-01"
+last_updated = "2025-07-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.23
+output = 2.30
+cache_read = 0.20
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
new file mode 100644
index 000000000..60209c9fd
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-30B-A3B
+name = "Qwen3 30B A3B"
+family = "qwen"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.08
+output = 0.28
+
+[limit]
+context = 40_960
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3-32B.toml b/providers/deepinfra/models/Qwen/Qwen3-32B.toml
new file mode 100644
index 000000000..8fb7ef139
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-32B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-32B
+name = "Qwen3 32B"
+family = "qwen"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.08
+output = 0.28
+
+[limit]
+context = 40_960
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
index 92f8cb540..1154bb779 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
@@ -10,8 +10,9 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 0.3
-output = 1.2
+input = 0.30
+output = 1.00
+cache_read = 0.10
 
 [limit]
 context = 262_144
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
new file mode 100644
index 000000000..abcf7d075
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-Next-80B-A3B-Instruct
+name = "Qwen3 Next 80B A3B Instruct"
+family = "qwen"
+release_date = "2026-03-01"
+last_updated = "2026-03-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.09
+output = 1.10
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
new file mode 100644
index 000000000..67bed3f00
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/Qwen/Qwen3-VL-235B-A22B-Instruct
+name = "Qwen3 VL 235B A22B Instruct"
+family = "qwen"
+release_date = "2025-05-01"
+last_updated = "2025-05-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.88
+cache_read = 0.11
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
new file mode 100644
index 000000000..0954ffa6a
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-VL-30B-A3B-Instruct
+name = "Qwen3 VL 30B A3B Instruct"
+family = "qwen"
+release_date = "2025-05-01"
+last_updated = "2025-05-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.15
+output = 0.60
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
new file mode 100644
index 000000000..4940cd7b6
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Qwen/Qwen3.5-0.8B
+name = "Qwen3.5 0.8B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.01
+output = 0.05
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
new file mode 100644
index 000000000..4f3020eb7
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3.5-122B-A10B
+name = "Qwen3.5 122B A10B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.29
+output = 2.40
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
new file mode 100644
index 000000000..23b94b3de
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3.5-27B
+name = "Qwen3.5 27B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.26
+output = 2.60
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
new file mode 100644
index 000000000..01cbc6d9a
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Qwen/Qwen3.5-2B
+name = "Qwen3.5 2B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.02
+output = 0.10
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
index 10dbaf490..e342e5b18 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
@@ -10,9 +10,8 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 0.2
-output = 0.95
-cached = 0.1
+input = 0.18
+output = 1.00
 
 [limit]
 context = 262_144
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
new file mode 100644
index 000000000..7ed305694
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Qwen/Qwen3.5-4B
+name = "Qwen3.5 4B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.03
+output = 0.15
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
new file mode 100644
index 000000000..2320ecf02
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Qwen/Qwen3.5-9B
+name = "Qwen3.5 9B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.15
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml
new file mode 100644
index 000000000..dab29ef0a
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3.6-27B
+name = "Qwen3.6 27B"
+family = "qwen3.6"
+release_date = "2026-04-01"
+last_updated = "2026-04-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.32
+output = 3.20
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
index 5a42cc4e8..5c1c94a60 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
@@ -9,8 +9,8 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 0.20
-output = 1.00
+input = 0.15
+output = 0.95
 
 [limit]
 context = 262_144
diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
new file mode 100644
index 000000000..266f6ea62
--- /dev/null
+++ b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Sao10K/L3-8B-Lunaris-v1-Turbo
+name = "L3 8B Lunaris v1 Turbo"
+family = "llama"
+release_date = "2024-09-01"
+last_updated = "2024-09-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.05
+
+[limit]
+context = 8192
+output = 2048
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
new file mode 100644
index 000000000..cab06dcc8
--- /dev/null
+++ b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Sao10K/L3.1-70B-Euryale-v2.2
+name = "L3.1 70B Euryale v2.2"
+family = "llama"
+release_date = "2024-11-01"
+last_updated = "2024-11-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.85
+output = 0.85
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
new file mode 100644
index 000000000..cb2d1cae7
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528-Turbo
+name = "DeepSeek R1 0528 Turbo"
+family = "deepseek-thinking"
+release_date = "2025-05-28"
+last_updated = "2025-05-28"
+attachment = false
+reasoning = true
+temperature = true
+knowledge = "2024-07"
+open_weights = false
+
+[cost]
+input = 1.00
+output = 3.00
+
+[limit]
+context = 32_768
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
new file mode 100644
index 000000000..32b40c39a
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+name = "DeepSeek R1 Distill Llama 70B"
+family = "deepseek-thinking"
+release_date = "2025-02-01"
+last_updated = "2025-02-01"
+attachment = false
+reasoning = true
+temperature = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.70
+output = 0.80
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
new file mode 100644
index 000000000..a290691b9
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3-0324
+name = "DeepSeek V3 0324"
+family = "deepseek"
+release_date = "2025-03-24"
+last_updated = "2025-03-24"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-12"
+open_weights = false
+
+[cost]
+input = 0.20
+output = 0.77
+cache_read = 0.135
+
+[limit]
+context = 163_840
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
new file mode 100644
index 000000000..3f25ea8e7
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3.1-Terminus
+name = "DeepSeek V3.1 Terminus"
+family = "deepseek"
+release_date = "2026-01-01"
+last_updated = "2026-01-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-12"
+open_weights = false
+
+[cost]
+input = 0.21
+output = 0.79
+cache_read = 0.13
+
+[limit]
+context = 163_840
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
new file mode 100644
index 000000000..81118a4ff
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3.1
+name = "DeepSeek V3.1"
+family = "deepseek"
+release_date = "2025-12-01"
+last_updated = "2025-12-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-12"
+open_weights = false
+
+[cost]
+input = 0.21
+output = 0.79
+cache_read = 0.13
+
+[limit]
+context = 163_840
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
new file mode 100644
index 000000000..cad03c604
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3
+name = "DeepSeek V3"
+family = "deepseek"
+release_date = "2025-11-01"
+last_updated = "2025-11-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-12"
+open_weights = false
+
+[cost]
+input = 0.32
+output = 0.89
+
+[limit]
+context = 163_840
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml
new file mode 100644
index 000000000..4632e23cf
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V4-Flash
+name = "DeepSeek V4 Flash"
+family = "deepseek-flash"
+release_date = "2026-04-01"
+last_updated = "2026-04-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.14
+output = 0.28
+cache_read = 0.028
+
+[limit]
+context = 1_048_576
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml
new file mode 100644
index 000000000..d30ec29f7
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-3-12b-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-3-12b-it
+name = "Gemma 3 12B IT"
+family = "gemma"
+release_date = "2025-02-01"
+last_updated = "2025-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.13
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml
new file mode 100644
index 000000000..4c4b9d3d0
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-3-27b-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-3-27b-it
+name = "Gemma 3 27B IT"
+family = "gemma"
+release_date = "2025-02-01"
+last_updated = "2025-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.08
+output = 0.16
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml
new file mode 100644
index 000000000..6c7b160bf
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-3-4b-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-3-4b-it
+name = "Gemma 3 4B IT"
+family = "gemma"
+release_date = "2025-02-01"
+last_updated = "2025-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.08
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
new file mode 100644
index 000000000..a96231d05
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-4-26B-A4B-it
+name = "Gemma 4 26B A4B IT"
+family = "gemma"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.07
+output = 0.34
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/google/gemma-4-31B-it.toml b/providers/deepinfra/models/google/gemma-4-31B-it.toml
new file mode 100644
index 000000000..71180fef5
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-4-31B-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-4-31B-it
+name = "Gemma 4 31B IT"
+family = "gemma"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.13
+output = 0.38
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
new file mode 100644
index 000000000..14039f65c
--- /dev/null
+++ b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/meta-llama/Llama-3.2-11B-Vision-Instruct
+name = "Llama 3.2 11B Vision Instruct"
+family = "llama"
+release_date = "2024-10-01"
+last_updated = "2024-10-01"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.245
+output = 0.245
+
+[limit]
+context = 131_072
+output = 4096
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
new file mode 100644
index 000000000..c2ff559fb
--- /dev/null
+++ b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/meta-llama/Llama-Guard-4-12B
+name = "Llama Guard 4 12B"
+family = "llama"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.18
+output = 0.18
+
+[limit]
+context = 163_840
+output = 4096
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml
new file mode 100644
index 000000000..4413a7167
--- /dev/null
+++ b/providers/deepinfra/models/microsoft/phi-4.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/microsoft/phi-4
+name = "Phi 4"
+family = "phi"
+release_date = "2024-12-01"
+last_updated = "2024-12-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = false
+
+[cost]
+input = 0.07
+output = 0.14
+
+[limit]
+context = 16_384
+output = 4096
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
new file mode 100644
index 000000000..55c02dcdd
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/mistralai/Mistral-Nemo-Instruct-2407
+name = "Mistral Nemo Instruct 2407"
+family = "mistral-nemo"
+release_date = "2024-07-01"
+last_updated = "2024-07-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = true
+
+[cost]
+input = 0.02
+output = 0.04
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
new file mode 100644
index 000000000..141e53388
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/mistralai/Mistral-Small-24B-Instruct-2501
+name = "Mistral Small 24B Instruct 2501"
+family = "mistral-small"
+release_date = "2025-01-01"
+last_updated = "2025-01-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.05
+output = 0.08
+
+[limit]
+context = 32_768
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
new file mode 100644
index 000000000..d8c11caf1
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/mistralai/Mistral-Small-3.2-24B-Instruct-2506
+name = "Mistral Small 3.2 24B Instruct 2506"
+family = "mistral-small"
+release_date = "2025-06-01"
+last_updated = "2025-06-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.075
+output = 0.20
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
index 84183d853..c1d110370 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
@@ -11,9 +11,9 @@ knowledge = "2025-01"
 open_weights = true
 
 [cost]
-input = 0.50
-output = 2.80
-cached_input = 0.09
+input = 0.45
+output = 2.25
+cache_read = 0.07
 
 [limit]
 context = 262_144
diff --git a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
new file mode 100644
index 000000000..52f52fc2a
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5
+name = "Llama 3.3 Nemotron Super 49B v1.5"
+family = "nemotron"
+release_date = "2025-08-01"
+last_updated = "2025-08-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.40
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
new file mode 100644
index 000000000..1b9ee873b
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B
+name = "Nemotron 3 Super 120B A12B"
+family = "nemotron"
+release_date = "2025-10-01"
+last_updated = "2025-10-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.50
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
new file mode 100644
index 000000000..a74190003
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
@@ -0,0 +1,21 @@
+# https://deepinfra.com/nvidia/NVIDIA-Nemotron-Nano-9B-v2
+name = "Nemotron Nano 9B v2"
+family = "nemotron"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.16
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
new file mode 100644
index 000000000..539125159
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/nvidia/Nemotron-3-Nano-30B-A3B
+name = "Nemotron 3 Nano 30B A3B"
+family = "nemotron"
+release_date = "2026-03-01"
+last_updated = "2026-03-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.05
+output = 0.20
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml
new file mode 100644
index 000000000..5224a123b
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning
+name = "Nemotron 3 Nano Omni 30B A3B Reasoning"
+family = "nemotron"
+release_date = "2026-04-01"
+last_updated = "2026-04-01"
+attachment = true
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.80
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video", "audio"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
new file mode 100644
index 000000000..095afb11d
--- /dev/null
+++ b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/openai/gpt-oss-120b-Turbo
+name = "GPT OSS 120B Turbo"
+family = "gpt-oss"
+release_date = "2025-09-01"
+last_updated = "2025-09-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.15
+output = 0.60
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
new file mode 100644
index 000000000..80e947033
--- /dev/null
+++ b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
@@ -0,0 +1,26 @@
+# https://deepinfra.com/stepfun-ai/Step-3.5-Flash
+name = "Step 3.5 Flash"
+family = "step"
+release_date = "2025-12-01"
+last_updated = "2025-12-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[interleaved]
+field = "reasoning_content"
+
+[cost]
+input = 0.10
+output = 0.30
+cache_read = 0.02
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml
index 76b66a0c5..47dc4f9af 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.7.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml
@@ -19,7 +19,7 @@ output = 1.75
 cache_read = 0.08
 
 [limit]
-context = 202_752 
+context = 202_752
 # https://deepinfra.com/docs/advanced/max_tokens_limit
 output = 16_384
 
diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml
index 257b2c673..de1007aa8 100644
--- a/providers/deepinfra/models/zai-org/GLM-5.toml
+++ b/providers/deepinfra/models/zai-org/GLM-5.toml
@@ -14,9 +14,9 @@ open_weights = true
 field = "reasoning_content"
 
 [cost]
-input = 0.8
-output = 2.56
-cache_read = 0.16
+input = 0.60
+output = 2.08
+cache_read = 0.12
 
 [limit]
 context = 202_752 

From 2070df62c2ef55f2e3a556798f4e2dc912cb7779 Mon Sep 17 00:00:00 2001
From: RioPlay <rio@rioplay.dev>
Date: Sat, 2 May 2026 02:07:50 -0500
Subject: [PATCH 2/9] fix(deepinfra): correct cache pricing field names
 (cache_read) across all providers

---
 providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml      | 2 +-
 providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml        | 2 +-
 providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml      | 2 +-
 providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml | 2 +-
 providers/vercel/models/deepseek/deepseek-v3.2-exp.toml     | 2 +-
 providers/vercel/models/zai/glm-4.6.toml                    | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
index a0b0ac5d7..21463f3e6 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
@@ -11,7 +11,7 @@ knowledge = "2025-06"
 [cost]
 input = 0.28
 output = 1.20
-cached_read = 0.14
+cache_read = 0.14
 
 [limit]
 context = 196_608
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
index e726226c7..4a5cca112 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
@@ -15,7 +15,7 @@ field = "reasoning_content"
 [cost]
 input = 0.254
 output = 1.02
-cached_input = 0.127
+cache_read = 0.127
 
 [limit]
 context = 262_144
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
index 05a7a0686..e961adf51 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
@@ -12,7 +12,7 @@ open_weights = true
 [cost]
 input = 0.54
 output = 3.4
-cached = 0.27
+cache_read = 0.27
 
 [limit]
 context = 262_144
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
index b455e5568..4a3436a72 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
@@ -15,7 +15,7 @@ field = "reasoning_content"
 [cost]
 input = 0.47
 output = 2.00
-cached_input = 0.141
+cache_read = 0.141
 
 [limit]
 context = 131_072
diff --git a/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml b/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml
index cccb3ccf8..9527b74b0 100644
--- a/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml
+++ b/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml
@@ -12,7 +12,7 @@ open_weights = false
 [cost]
 input = 0.27
 output = 0.4
-cached_input = 0.27
+cache_read = 0.27
 
 [limit]
 context = 163_840
diff --git a/providers/vercel/models/zai/glm-4.6.toml b/providers/vercel/models/zai/glm-4.6.toml
index 82a689774..d43143488 100644
--- a/providers/vercel/models/zai/glm-4.6.toml
+++ b/providers/vercel/models/zai/glm-4.6.toml
@@ -14,7 +14,7 @@ interleaved = true
 [cost]
 input = 0.45
 output = 1.8
-cached_input = 0.45
+cache_read = 0.45
 
 [limit]
 context = 200_000

From 5ce16fb0fd7ac2d9225ff1d72b8bd42c88f27aad Mon Sep 17 00:00:00 2001
From: RioPlay <rio@rioplay.dev>
Date: Sat, 2 May 2026 02:07:57 -0500
Subject: [PATCH 3/9] style(deepinfra): add trailing newlines, deepinfra.com
 comments, fix trailing whitespace

---
 providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml         | 2 +-
 providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml         | 3 ++-
 providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml         | 1 +
 providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml           | 2 ++
 .../deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml | 2 +-
 .../deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml  | 2 +-
 providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml      | 2 +-
 providers/deepinfra/models/Qwen/Qwen3-14B.toml                 | 2 +-
 .../deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml   | 2 +-
 .../deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml   | 2 +-
 providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml             | 2 +-
 providers/deepinfra/models/Qwen/Qwen3-32B.toml                 | 2 +-
 .../models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml      | 3 ++-
 .../deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml  | 3 ++-
 .../deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml     | 2 +-
 .../deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml     | 2 +-
 providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml | 2 +-
 providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml              | 2 +-
 providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml         | 2 +-
 providers/deepinfra/models/Qwen/Qwen3.5-27B.toml               | 2 +-
 providers/deepinfra/models/Qwen/Qwen3.5-2B.toml                | 2 +-
 providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml           | 2 ++
 providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml         | 2 ++
 providers/deepinfra/models/Qwen/Qwen3.5-4B.toml                | 2 +-
 providers/deepinfra/models/Qwen/Qwen3.5-9B.toml                | 2 +-
 providers/deepinfra/models/Qwen/Qwen3.6-27B.toml               | 2 +-
 providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml           | 3 +++
 providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml  | 2 +-
 providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml   | 2 +-
 .../deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml   | 2 ++
 providers/deepinfra/models/anthropic/claude-4-opus.toml        | 2 ++
 .../deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml   | 2 +-
 providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml   | 2 ++
 .../models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml      | 2 +-
 providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml   | 2 +-
 .../deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml   | 2 +-
 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml      | 2 +-
 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml      | 2 ++
 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml        | 2 +-
 providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml  | 2 +-
 providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml    | 2 ++
 providers/deepinfra/models/google/gemma-3-12b-it.toml          | 2 +-
 providers/deepinfra/models/google/gemma-3-27b-it.toml          | 2 +-
 providers/deepinfra/models/google/gemma-3-4b-it.toml           | 2 +-
 providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml      | 2 +-
 providers/deepinfra/models/google/gemma-4-31B-it.toml          | 2 +-
 .../models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml        | 2 ++
 .../deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml    | 2 ++
 .../models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml         | 2 ++
 .../deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml     | 2 ++
 .../models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml       | 2 +-
 .../models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml        | 2 ++
 .../meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml     | 2 ++
 .../models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml      | 2 ++
 providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml   | 2 +-
 providers/deepinfra/models/microsoft/phi-4.toml                | 2 +-
 .../deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml | 2 +-
 .../models/mistralai/Mistral-Small-24B-Instruct-2501.toml      | 2 +-
 .../models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml  | 2 +-
 .../deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml     | 2 +-
 providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml    | 2 ++
 providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml    | 2 ++
 providers/deepinfra/models/moonshotai/Kimi-K2.5.toml           | 3 ++-
 providers/deepinfra/models/moonshotai/Kimi-K2.6.toml           | 2 ++
 .../models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml       | 2 +-
 .../models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml       | 2 +-
 .../deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml    | 2 +-
 providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml | 2 +-
 .../models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml  | 2 +-
 providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml      | 2 +-
 providers/deepinfra/models/openai/gpt-oss-120b.toml            | 1 +
 providers/deepinfra/models/openai/gpt-oss-20b.toml             | 1 +
 providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml      | 2 +-
 providers/deepinfra/models/zai-org/GLM-4.5.toml                | 2 ++
 providers/deepinfra/models/zai-org/GLM-4.6.toml                | 2 +-
 providers/deepinfra/models/zai-org/GLM-4.6V.toml               | 2 +-
 providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml          | 1 +
 providers/deepinfra/models/zai-org/GLM-4.7.toml                | 1 +
 providers/deepinfra/models/zai-org/GLM-5.1.toml                | 3 ++-
 providers/deepinfra/models/zai-org/GLM-5.toml                  | 3 ++-
 80 files changed, 107 insertions(+), 55 deletions(-)

diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
index f66b57a4c..d8e56482b 100644
--- a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
+++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
@@ -18,4 +18,4 @@ output = 2048
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
index 21463f3e6..c0e928ab7 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/MiniMaxAI/MiniMax-M2.1
 name = "MiniMax M2.1"
 release_date = "2025-12-23"
 last_updated = "2025-12-23"
@@ -16,7 +17,7 @@ cache_read = 0.14
 [limit]
 context = 196_608
 output = 196_608
- 
+
 [modalities]
 input = ["text"]
 output = ["text"]
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
index 0a3d9dcf8..f40361980 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
@@ -24,3 +24,4 @@ output = ["text"]
 
 [interleaved]
 field = "reasoning_content"
+
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
index 4a5cca112..333efc4b4 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/MiniMaxAI/MiniMax-M2
 name = "MiniMax M2"
 family = "minimax"
 release_date = "2025-11-13"
@@ -24,3 +25,4 @@ output = 32_768
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
index dccf1e193..18cad0a2a 100644
--- a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
+++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
@@ -20,4 +20,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
index 3bb7f2a06..5936ea888 100644
--- a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
+++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
@@ -20,4 +20,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
index e459c46a1..d88fd6fc9 100644
--- a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
+++ b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
@@ -20,4 +20,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-14B.toml b/providers/deepinfra/models/Qwen/Qwen3-14B.toml
index 021b0fb44..50529e7f4 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-14B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-14B.toml
@@ -20,4 +20,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
index 63d303946..f89d4830a 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
@@ -20,4 +20,4 @@ output = 81_920
 
 [modalities]
 input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
index ce52431dc..4d616a932 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
@@ -21,4 +21,4 @@ output = 81_920
 
 [modalities]
 input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
index 60209c9fd..362cc6cfe 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
@@ -20,4 +20,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-32B.toml b/providers/deepinfra/models/Qwen/Qwen3-32B.toml
index 8fb7ef139..d5a5040a0 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-32B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-32B.toml
@@ -20,4 +20,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
index 1154bb779..8b1303f14 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo
 name = "Qwen3 Coder 480B A35B Instruct Turbo"
 family = "qwen"
 release_date = "2025-07-23"
@@ -20,4 +21,4 @@ output = 66_536
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
index 08c0ed4ab..c556f98b8 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/Qwen/Qwen3-Coder-480B-A35B-Instruct
 name = "Qwen3 Coder 480B A35B Instruct"
 family = "qwen"
 release_date = "2025-07-23"
@@ -19,4 +20,4 @@ output = 66_536
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
index abcf7d075..9bd4fa4f7 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
@@ -20,4 +20,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
index 67bed3f00..a1f5c5c71 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
@@ -21,4 +21,4 @@ output = 81_920
 
 [modalities]
 input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
index 0954ffa6a..e9277d8b3 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
@@ -20,4 +20,4 @@ output = 81_920
 
 [modalities]
 input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
index 4940cd7b6..afb332615 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
@@ -19,4 +19,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
index 4f3020eb7..6a8de84a4 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
@@ -20,4 +20,4 @@ output = 81_920
 
 [modalities]
 input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
index 23b94b3de..7b0cae035 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
@@ -20,4 +20,4 @@ output = 81_920
 
 [modalities]
 input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
index 01cbc6d9a..8954a1c1a 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
@@ -19,4 +19,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
index e342e5b18..ee19fe8c4 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/Qwen/Qwen3.5-35B-A3B
 name = "Qwen 3.5 35B A3B"
 family = "qwen"
 release_date = "2026-02-01"
@@ -20,3 +21,4 @@ output = 81_920
 [modalities]
 input = ["text","image","video"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
index e961adf51..e5a826187 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/Qwen/Qwen3.5-397B-A17B
 name = "Qwen 3.5 397B A17B"
 family = "qwen"
 release_date = "2026-02-01"
@@ -21,3 +22,4 @@ output = 81_920
 [modalities]
 input = ["text","image","video"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
index 7ed305694..831beb24f 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
@@ -19,4 +19,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
index 2320ecf02..4b5999d86 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
@@ -19,4 +19,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml
index dab29ef0a..5540d5cd5 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml
@@ -20,4 +20,4 @@ output = 81_920
 
 [modalities]
 input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
index 5c1c94a60..7ffe7d817 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
@@ -1,3 +1,5 @@
+# https://deepinfra.com/Qwen/Qwen3.6-35B-A3B
+# https://deepinfra.com/Qwen/Qwen3.6-35B-A3B
 name = "Qwen3.6 35B A3B"
 family = "qwen"
 release_date = "2026-04-01"
@@ -19,3 +21,4 @@ output = 81_920
 [modalities]
 input = ["text", "image", "video"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
index 266f6ea62..71ee291c8 100644
--- a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
+++ b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
@@ -19,4 +19,4 @@ output = 2048
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
index cab06dcc8..791da9937 100644
--- a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
+++ b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
@@ -19,4 +19,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml
index 28d93a4fb..605a835e0 100644
--- a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml
+++ b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/anthropic/claude-3-7-sonnet-latest
 name = "Claude Sonnet 3.7 (Latest)"
 family = "claude-sonnet"
 release_date = "2025-03-13"
@@ -21,3 +22,4 @@ output = 64_000
 [modalities]
 input = ["text", "image"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/anthropic/claude-4-opus.toml b/providers/deepinfra/models/anthropic/claude-4-opus.toml
index 51f441d78..007d325c2 100644
--- a/providers/deepinfra/models/anthropic/claude-4-opus.toml
+++ b/providers/deepinfra/models/anthropic/claude-4-opus.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/anthropic/claude-4-opus
 name = "Claude Opus 4"
 family = "claude-opus"
 release_date = "2025-06-12"
@@ -20,3 +21,4 @@ output = 32_000
 [modalities]
 input = ["text", "image"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
index cb2d1cae7..949934ae1 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
@@ -19,4 +19,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
index 2733a6938..40cb5a1b1 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528
 name = "DeepSeek-R1-0528"
 release_date = "2025-05-28"
 last_updated = "2025-05-28"
@@ -23,3 +24,4 @@ output = 64_000
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
index 32b40c39a..13616d0bf 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
@@ -19,4 +19,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
index a290691b9..5e659a00e 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
@@ -21,4 +21,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
index 3f25ea8e7..12a439bbc 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
@@ -21,4 +21,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
index 81118a4ff..e64e5eb3a 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
@@ -21,4 +21,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml
index 74d5e7d60..954cef7a8 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3.2
 name = "DeepSeek-V3.2"
 release_date = "2025-12-02"
 last_updated = "2025-12-02"
@@ -23,3 +24,4 @@ output = 64_000
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
index cad03c604..8acef8588 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
@@ -20,4 +20,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml
index 4632e23cf..e1f00596c 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml
@@ -20,4 +20,4 @@ output = 131_072
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml
index e867b3a48..59057d4f0 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V4-Pro
 attachment = false
 
 [extends]
@@ -6,3 +7,4 @@ from = "deepseek/deepseek-v4-pro"
 [limit]
 context = 65_536
 output = 65_536
+
diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml
index d30ec29f7..4e54d7491 100644
--- a/providers/deepinfra/models/google/gemma-3-12b-it.toml
+++ b/providers/deepinfra/models/google/gemma-3-12b-it.toml
@@ -20,4 +20,4 @@ output = 8192
 
 [modalities]
 input = ["text", "image"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml
index 4c4b9d3d0..394a82b5d 100644
--- a/providers/deepinfra/models/google/gemma-3-27b-it.toml
+++ b/providers/deepinfra/models/google/gemma-3-27b-it.toml
@@ -20,4 +20,4 @@ output = 8192
 
 [modalities]
 input = ["text", "image"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml
index 6c7b160bf..6c57358d5 100644
--- a/providers/deepinfra/models/google/gemma-3-4b-it.toml
+++ b/providers/deepinfra/models/google/gemma-3-4b-it.toml
@@ -20,4 +20,4 @@ output = 8192
 
 [modalities]
 input = ["text", "image"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
index a96231d05..71f2ae483 100644
--- a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
+++ b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
@@ -20,4 +20,4 @@ output = 16_384
 
 [modalities]
 input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-4-31B-it.toml b/providers/deepinfra/models/google/gemma-4-31B-it.toml
index 71180fef5..9c53c1d46 100644
--- a/providers/deepinfra/models/google/gemma-4-31B-it.toml
+++ b/providers/deepinfra/models/google/gemma-4-31B-it.toml
@@ -20,4 +20,4 @@ output = 16_384
 
 [modalities]
 input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml
index 79e674844..b26fd8538 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.1-70B-Instruct-Turbo
 name = "Llama 3.1 70B Turbo"
 family = "llama"
 release_date = "2024-07-23"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml
index 2edbcb221..9d7a096b1 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.1-70B-Instruct
 name = "Llama 3.1 70B"
 family = "llama"
 release_date = "2024-07-23"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml
index 3646a2d47..a883556e7 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.1-8B-Instruct-Turbo
 name = "Llama 3.1 8B Turbo"
 family = "llama"
 release_date = "2024-07-23"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml
index ec3539991..b2e9f62ee 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.1-8B-Instruct
 name = "Llama 3.1 8B"
 family = "llama"
 release_date = "2024-07-23"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
index 14039f65c..e5ec1eaf7 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
@@ -19,4 +19,4 @@ output = 4096
 
 [modalities]
 input = ["text", "image"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
index df433558b..6903f0a56 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.3-70B-Instruct-Turbo
 name = "Llama 3.3 70B Turbo"
 family = "llama"
 release_date = "2024-12-06"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
index 1a2b88bbc..05e0d1afd 100644
--- a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
 name = "Llama 4 Maverick 17B FP8"
 family = "llama"
 release_date = "2025-04-05"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text", "image"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
index 8f7bab10f..efcb012ef 100644
--- a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-4-Scout-17B-16E-Instruct
 name = "Llama 4 Scout 17B"
 family = "llama"
 release_date = "2025-04-05"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text", "image"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
index c2ff559fb..4f00ea1f7 100644
--- a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
@@ -19,4 +19,4 @@ output = 4096
 
 [modalities]
 input = ["text", "image"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml
index 4413a7167..ea25cd4e6 100644
--- a/providers/deepinfra/models/microsoft/phi-4.toml
+++ b/providers/deepinfra/models/microsoft/phi-4.toml
@@ -20,4 +20,4 @@ output = 4096
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
index 55c02dcdd..e865afb64 100644
--- a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
+++ b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
@@ -20,4 +20,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
index 141e53388..b2dd4dd4b 100644
--- a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
+++ b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
@@ -20,4 +20,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
index d8c11caf1..4107b6d91 100644
--- a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
+++ b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
@@ -20,4 +20,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
index 180430bf1..6eadca87c 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
@@ -21,4 +21,4 @@ output = 262_144
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml
index 285310a44..11a976885 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/moonshotai/Kimi-K2-Instruct
 name = "Kimi K2"
 family = "kimi"
 release_date = "2025-07-11"
@@ -20,3 +21,4 @@ output = 32_768
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
index 4a3436a72..2a56b9671 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/moonshotai/Kimi-K2-Thinking
 name = "Kimi K2 Thinking"
 family = "kimi-thinking"
 release_date = "2025-11-06"
@@ -24,3 +25,4 @@ output = 32_768
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
index c1d110370..7663f4983 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/moonshotai/Kimi-K2.5
 name = "Kimi K2.5"
 family = "kimi"
 release_date = "2026-01-27"
@@ -24,4 +25,4 @@ input = ["text", "image", "video"]
 output = ["text"]
 
 [interleaved]
-field = "reasoning_content"
\ No newline at end of file
+field = "reasoning_content"
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml
index ebcf7f7c9..16e6bdfc7 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/moonshotai/Kimi-K2.6
 name = "Kimi K2.6"
 family = "kimi"
 release_date = "2026-04-21"
@@ -26,3 +27,4 @@ output = 16_384
 [modalities]
 input = ["text", "image", "video"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
index 52f52fc2a..c1fa2f2fc 100644
--- a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
+++ b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
@@ -19,4 +19,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
index 1b9ee873b..9313244eb 100644
--- a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
@@ -19,4 +19,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
index a74190003..3f2950494 100644
--- a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
@@ -18,4 +18,4 @@ output = 8192
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
index 539125159..0c0448b6e 100644
--- a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
+++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
@@ -19,4 +19,4 @@ output = 81_920
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml
index 5224a123b..c7378fdc9 100644
--- a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml
+++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml
@@ -21,4 +21,4 @@ output = 81_920
 
 [modalities]
 input = ["text", "image", "video", "audio"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
index 095afb11d..2cc256b55 100644
--- a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
+++ b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
@@ -19,4 +19,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml
index 0889e79c2..5243442c0 100644
--- a/providers/deepinfra/models/openai/gpt-oss-120b.toml
+++ b/providers/deepinfra/models/openai/gpt-oss-120b.toml
@@ -22,3 +22,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/openai/gpt-oss-20b.toml b/providers/deepinfra/models/openai/gpt-oss-20b.toml
index 9342fee14..7a0c249c4 100644
--- a/providers/deepinfra/models/openai/gpt-oss-20b.toml
+++ b/providers/deepinfra/models/openai/gpt-oss-20b.toml
@@ -22,3 +22,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
index 80e947033..19b280c7b 100644
--- a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
+++ b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
@@ -23,4 +23,4 @@ output = 16_384
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.5.toml b/providers/deepinfra/models/zai-org/GLM-4.5.toml
index ae5ae7ec1..0bff83456 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.5.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.5.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/zai-org/GLM-4.5
 name = "GLM-4.5"
 family = "glm"
 release_date = "2025-07-28"
@@ -24,3 +25,4 @@ output = 98_304
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/zai-org/GLM-4.6.toml b/providers/deepinfra/models/zai-org/GLM-4.6.toml
index 63eb5b3f7..13d75372f 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.6.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.6.toml
@@ -24,4 +24,4 @@ output = 131_072
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.6V.toml b/providers/deepinfra/models/zai-org/GLM-4.6V.toml
index b3f6ce288..59db6966e 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.6V.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.6V.toml
@@ -23,4 +23,4 @@ output = 131_072
 
 [modalities]
 input = ["text", "image"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
index dbb42a2d8..784459f81 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
@@ -25,3 +25,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml
index 47dc4f9af..fa985538c 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.7.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml
@@ -26,3 +26,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml
index 1b901d25e..286d0c14a 100644
--- a/providers/deepinfra/models/zai-org/GLM-5.1.toml
+++ b/providers/deepinfra/models/zai-org/GLM-5.1.toml
@@ -20,10 +20,11 @@ output = 4.4
 cache_read = 0.26
 
 [limit]
-context = 202_752 
+context = 202_752
 # https://deepinfra.com/docs/advanced/max_tokens_limit
 output = 16_384
 
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml
index de1007aa8..334829a39 100644
--- a/providers/deepinfra/models/zai-org/GLM-5.toml
+++ b/providers/deepinfra/models/zai-org/GLM-5.toml
@@ -19,10 +19,11 @@ output = 2.08
 cache_read = 0.12
 
 [limit]
-context = 202_752 
+context = 202_752
 # https://deepinfra.com/docs/advanced/max_tokens_limit
 output = 16_384
 
 [modalities]
 input = ["text"]
 output = ["text"]
+

From d8a5369f50b9838f3ad5e113a9ac66b73a01e712 Mon Sep 17 00:00:00 2001
From: RioPlay <rio@rioplay.dev>
Date: Sat, 2 May 2026 02:08:04 -0500
Subject: [PATCH 4/9] fix(deepinfra): correct pricing for GLM-5.1 and add
 missing cache_read for GLM-4.7-Flash

---
 providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml | 1 +
 providers/deepinfra/models/zai-org/GLM-5.1.toml       | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
index 784459f81..bbc1835a1 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
@@ -16,6 +16,7 @@ field = "reasoning_content"
 [cost]
 input = 0.06
 output = 0.40
+cache_read = 0.01
 
 [limit]
 context = 202_752
diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml
index 286d0c14a..3f0e7d742 100644
--- a/providers/deepinfra/models/zai-org/GLM-5.1.toml
+++ b/providers/deepinfra/models/zai-org/GLM-5.1.toml
@@ -15,9 +15,9 @@ structured_output = true
 field = "reasoning_content"
 
 [cost]
-input = 1.4
-output = 4.4
-cache_read = 0.26
+input = 1.05
+output = 3.50
+cache_read = 0.205
 
 [limit]
 context = 202_752

From b3ffde6261f7145cf0032c1208bf32106c49db8f Mon Sep 17 00:00:00 2001
From: RioPlay <rio@rioplay.dev>
Date: Sat, 2 May 2026 02:08:08 -0500
Subject: [PATCH 5/9] =?UTF-8?q?fix(deepinfra):=20correct=20pricing=20for?=
 =?UTF-8?q?=20GLM-4.7=20(input=200.43=E2=86=920.40)=20and=20gpt-oss-120b?=
 =?UTF-8?q?=20(0.05=E2=86=920.039,=200.24=E2=86=920.19)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 providers/deepinfra/models/openai/gpt-oss-120b.toml | 4 ++--
 providers/deepinfra/models/zai-org/GLM-4.7.toml     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml
index 5243442c0..d548bfcfa 100644
--- a/providers/deepinfra/models/openai/gpt-oss-120b.toml
+++ b/providers/deepinfra/models/openai/gpt-oss-120b.toml
@@ -11,8 +11,8 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 0.05
-output = 0.24
+input = 0.039
+output = 0.19
 
 [limit]
 context = 131_072
diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml
index fa985538c..cf984ad11 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.7.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml
@@ -14,7 +14,7 @@ open_weights = true
 field = "reasoning_content"
 
 [cost]
-input = 0.43
+input = 0.40
 output = 1.75
 cache_read = 0.08
 

From fdf26decfad55d54ddd263e45be952ee35fdeb72 Mon Sep 17 00:00:00 2001
From: RioPlay <rio@rioplay.dev>
Date: Sat, 2 May 2026 02:08:13 -0500
Subject: [PATCH 6/9] fix(deepinfra): restore missing cache_read for
 MiniMax-M2.5

---
 providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
index f40361980..fb5ba5865 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
@@ -13,6 +13,7 @@ open_weights = true
 [cost]
 input = 0.15
 output = 1.15
+cache_read = 0.03
 
 [limit]
 context = 204_800

From baaa68a2f63c7329738ee8ea1bc358d0621a733f Mon Sep 17 00:00:00 2001
From: RioPlay <rio@rioplay.dev>
Date: Sat, 2 May 2026 02:16:11 -0500
Subject: [PATCH 7/9] fix(deepinfra): add missing tool_call field to
 MythoMax-L2-13b

---
 providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
index d8e56482b..0bb65efd7 100644
--- a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
+++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
@@ -6,6 +6,7 @@ last_updated = "2023-11-01"
 attachment = false
 reasoning = false
 temperature = true
+tool_call = true
 open_weights = true
 
 [cost]

From e7cff4108759023119cbc0f88c06291cf2629f08 Mon Sep 17 00:00:00 2001
From: RioPlay <rio@rioplay.dev>
Date: Sat, 2 May 2026 02:18:35 -0500
Subject: [PATCH 8/9] fix(deepinfra): add missing tool_call to 3 models

---
 .../deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml     | 1 +
 .../models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml        | 1 +
 .../deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml      | 1 +
 3 files changed, 3 insertions(+)

diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
index 949934ae1..07dbecc67 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
@@ -6,6 +6,7 @@ last_updated = "2025-05-28"
 attachment = false
 reasoning = true
 temperature = true
+tool_call = true
 knowledge = "2024-07"
 open_weights = false
 
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
index 13616d0bf..fc563d534 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
@@ -6,6 +6,7 @@ last_updated = "2025-02-01"
 attachment = false
 reasoning = true
 temperature = true
+tool_call = true
 knowledge = "2024-10"
 open_weights = true
 
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
index 3f2950494..38a0aab72 100644
--- a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
@@ -6,6 +6,7 @@ last_updated = "2026-02-01"
 attachment = false
 reasoning = true
 temperature = true
+tool_call = true
 open_weights = true
 
 [cost]

From 14a53aefef01b982dbec348ae59b8d5cac756fe7 Mon Sep 17 00:00:00 2001
From: RioPlay <rio@rioplay.dev>
Date: Sat, 2 May 2026 02:24:19 -0500
Subject: [PATCH 9/9] revert: restore vercel provider files to original state

---
 providers/vercel/models/deepseek/deepseek-v3.2-exp.toml | 2 +-
 providers/vercel/models/zai/glm-4.6.toml                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml b/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml
index 9527b74b0..cccb3ccf8 100644
--- a/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml
+++ b/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml
@@ -12,7 +12,7 @@ open_weights = false
 [cost]
 input = 0.27
 output = 0.4
-cache_read = 0.27
+cached_input = 0.27
 
 [limit]
 context = 163_840
diff --git a/providers/vercel/models/zai/glm-4.6.toml b/providers/vercel/models/zai/glm-4.6.toml
index d43143488..82a689774 100644
--- a/providers/vercel/models/zai/glm-4.6.toml
+++ b/providers/vercel/models/zai/glm-4.6.toml
@@ -14,7 +14,7 @@ interleaved = true
 [cost]
 input = 0.45
 output = 1.8
-cache_read = 0.45
+cached_input = 0.45
 
 [limit]
 context = 200_000