From 7670a1fc8a4e265fbda4335bd66784f259444f88 Mon Sep 17 00:00:00 2001 From: RioPlay Date: Sat, 2 May 2026 02:07:44 -0500 Subject: [PATCH 1/9] chore(deepinfra): add 34 new models and update pricing on 6 existing - Update pricing on GLM-5, MiniMax-M2.5, Kimi-K2.5, Qwen3.5-35B-A3B, Qwen3.6-35B-A3B, Qwen3-Coder-480B-A35B-Instruct-Turbo to match deepinfra.com current rates - Add 5 nvidia models (Nemotron 3 series) - Add 7 deepseek-ai models (V4-Flash, V3 variants, R1 variants) - Add 5 google models (gemma 4 and gemma 3 series) - Add 13 Qwen models (Qwen3.5, Qwen3.6, Qwen3, Qwen3-VL, Qwen2.5) - Add Step-3.5-Flash, 2 meta-llama, 1 phi-4, 3 mistralai - Add gpt-oss-120b-Turbo, 2 NousResearch, Gryphe, 2 Sao10K --- .../models/Gryphe/MythoMax-L2-13b.toml | 21 +++++++++++++++ .../models/MiniMaxAI/MiniMax-M2.5.toml | 6 ++--- .../NousResearch/Hermes-3-Llama-3.1-405B.toml | 23 ++++++++++++++++ .../NousResearch/Hermes-3-Llama-3.1-70B.toml | 23 ++++++++++++++++ .../models/Qwen/Qwen2.5-72B-Instruct.toml | 23 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3-14B.toml | 23 ++++++++++++++++ .../Qwen/Qwen3-235B-A22B-Instruct-2507.toml | 23 ++++++++++++++++ .../Qwen/Qwen3-235B-A22B-Thinking-2507.toml | 24 +++++++++++++++++ .../deepinfra/models/Qwen/Qwen3-30B-A3B.toml | 23 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3-32B.toml | 23 ++++++++++++++++ .../Qwen3-Coder-480B-A35B-Instruct-Turbo.toml | 5 ++-- .../Qwen/Qwen3-Next-80B-A3B-Instruct.toml | 23 ++++++++++++++++ .../Qwen/Qwen3-VL-235B-A22B-Instruct.toml | 24 +++++++++++++++++ .../Qwen/Qwen3-VL-30B-A3B-Instruct.toml | 23 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3.5-0.8B.toml | 22 ++++++++++++++++ .../models/Qwen/Qwen3.5-122B-A10B.toml | 23 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3.5-27B.toml | 23 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3.5-2B.toml | 22 ++++++++++++++++ .../models/Qwen/Qwen3.5-35B-A3B.toml | 5 ++-- .../deepinfra/models/Qwen/Qwen3.5-4B.toml | 22 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3.5-9B.toml | 22 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3.6-27B.toml | 23 ++++++++++++++++ .../models/Qwen/Qwen3.6-35B-A3B.toml | 4 +-- .../models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml | 22 ++++++++++++++++ .../models/Sao10K/L3.1-70B-Euryale-v2.2.toml | 22 ++++++++++++++++ .../deepseek-ai/DeepSeek-R1-0528-Turbo.toml | 22 ++++++++++++++++ .../DeepSeek-R1-Distill-Llama-70B.toml | 22 ++++++++++++++++ .../models/deepseek-ai/DeepSeek-V3-0324.toml | 24 +++++++++++++++++ .../deepseek-ai/DeepSeek-V3.1-Terminus.toml | 24 +++++++++++++++++ .../models/deepseek-ai/DeepSeek-V3.1.toml | 24 +++++++++++++++++ .../models/deepseek-ai/DeepSeek-V3.toml | 23 ++++++++++++++++ .../models/deepseek-ai/DeepSeek-V4-Flash.toml | 23 ++++++++++++++++ .../models/google/gemma-3-12b-it.toml | 23 ++++++++++++++++ .../models/google/gemma-3-27b-it.toml | 23 ++++++++++++++++ .../models/google/gemma-3-4b-it.toml | 23 ++++++++++++++++ .../models/google/gemma-4-26B-A4B-it.toml | 23 ++++++++++++++++ .../models/google/gemma-4-31B-it.toml | 23 ++++++++++++++++ .../Llama-3.2-11B-Vision-Instruct.toml | 22 ++++++++++++++++ .../models/meta-llama/Llama-Guard-4-12B.toml | 22 ++++++++++++++++ .../deepinfra/models/microsoft/phi-4.toml | 23 ++++++++++++++++ .../mistralai/Mistral-Nemo-Instruct-2407.toml | 23 ++++++++++++++++ .../Mistral-Small-24B-Instruct-2501.toml | 23 ++++++++++++++++ .../Mistral-Small-3.2-24B-Instruct-2506.toml | 23 ++++++++++++++++ .../models/moonshotai/Kimi-K2.5.toml | 6 ++--- .../Llama-3.3-Nemotron-Super-49B-v1.5.toml | 22 ++++++++++++++++ .../NVIDIA-Nemotron-3-Super-120B-A12B.toml | 22 ++++++++++++++++ .../nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml | 21 +++++++++++++++ .../nvidia/Nemotron-3-Nano-30B-A3B.toml | 22 ++++++++++++++++ ...emotron-3-Nano-Omni-30B-A3B-Reasoning.toml | 24 +++++++++++++++++ .../models/openai/gpt-oss-120b-Turbo.toml | 22 ++++++++++++++++ .../models/stepfun-ai/Step-3.5-Flash.toml | 26 +++++++++++++++++++ .../deepinfra/models/zai-org/GLM-4.7.toml | 2 +- providers/deepinfra/models/zai-org/GLM-5.toml | 6 ++--- 53 files changed, 1065 insertions(+), 18 deletions(-) create mode 100644 providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml create mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml create mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-14B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-32B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-27B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-2B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-4B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-9B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.6-27B.toml create mode 100644 providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml create mode 100644 providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml create mode 100644 providers/deepinfra/models/google/gemma-3-12b-it.toml create mode 100644 providers/deepinfra/models/google/gemma-3-27b-it.toml create mode 100644 providers/deepinfra/models/google/gemma-3-4b-it.toml create mode 100644 providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml create mode 100644 providers/deepinfra/models/google/gemma-4-31B-it.toml create mode 100644 providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml create mode 100644 providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml create mode 100644 providers/deepinfra/models/microsoft/phi-4.toml create mode 100644 providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml create mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml create mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml create mode 100644 providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml create mode 100644 providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml create mode 100644 providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml create mode 100644 providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml create mode 100644 providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml new file mode 100644 index 000000000..f66b57a4c --- /dev/null +++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml @@ -0,0 +1,21 @@ +# https://deepinfra.com/Gryphe/MythoMax-L2-13b +name = "MythoMax L2 13B" +family = "llama" +release_date = "2023-11-01" +last_updated = "2023-11-01" +attachment = false +reasoning = false +temperature = true +open_weights = true + +[cost] +input = 0.40 +output = 0.40 + +[limit] +context = 4096 +output = 2048 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml index bbbdbe470..0a3d9dcf8 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml @@ -11,10 +11,8 @@ knowledge = "2025-06" open_weights = true [cost] -input = 0.27 -output = 0.95 -cache_read = 0.03 -cache_write = 0.375 +input = 0.15 +output = 1.15 [limit] context = 204_800 diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml new file mode 100644 index 000000000..dccf1e193 --- /dev/null +++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/NousResearch/Hermes-3-Llama-3.1-405B +name = "Hermes 3 Llama 3.1 405B" +family = "hermes" +release_date = "2024-10-01" +last_updated = "2024-10-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = true + +[cost] +input = 1.00 +output = 1.00 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml new file mode 100644 index 000000000..3bb7f2a06 --- /dev/null +++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/NousResearch/Hermes-3-Llama-3.1-70B +name = "Hermes 3 Llama 3.1 70B" +family = "hermes" +release_date = "2024-10-01" +last_updated = "2024-10-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = true + +[cost] +input = 0.30 +output = 0.30 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml new file mode 100644 index 000000000..e459c46a1 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen2.5-72B-Instruct +name = "Qwen2.5 72B Instruct" +family = "qwen" +release_date = "2024-12-01" +last_updated = "2024-12-01" +attachment = false +reasoning = false +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = true + +[cost] +input = 0.36 +output = 0.40 + +[limit] +context = 32_768 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3-14B.toml b/providers/deepinfra/models/Qwen/Qwen3-14B.toml new file mode 100644 index 000000000..021b0fb44 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-14B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-14B +name = "Qwen3 14B" +family = "qwen" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.12 +output = 0.24 + +[limit] +context = 40_960 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml new file mode 100644 index 000000000..63d303946 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-235B-A22B-Instruct-2507 +name = "Qwen3 235B A22B Instruct 2507" +family = "qwen" +release_date = "2025-07-01" +last_updated = "2025-07-01" +attachment = true +reasoning = false +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.071 +output = 0.10 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml new file mode 100644 index 000000000..ce52431dc --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/Qwen/Qwen3-235B-A22B-Thinking-2507 +name = "Qwen3 235B A22B Thinking 2507" +family = "qwen" +release_date = "2025-07-01" +last_updated = "2025-07-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.23 +output = 2.30 +cache_read = 0.20 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml new file mode 100644 index 000000000..60209c9fd --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-30B-A3B +name = "Qwen3 30B A3B" +family = "qwen" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.08 +output = 0.28 + +[limit] +context = 40_960 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3-32B.toml b/providers/deepinfra/models/Qwen/Qwen3-32B.toml new file mode 100644 index 000000000..8fb7ef139 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-32B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-32B +name = "Qwen3 32B" +family = "qwen" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.08 +output = 0.28 + +[limit] +context = 40_960 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml index 92f8cb540..1154bb779 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml @@ -10,8 +10,9 @@ tool_call = true open_weights = true [cost] -input = 0.3 -output = 1.2 +input = 0.30 +output = 1.00 +cache_read = 0.10 [limit] context = 262_144 diff --git a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml new file mode 100644 index 000000000..abcf7d075 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-Next-80B-A3B-Instruct +name = "Qwen3 Next 80B A3B Instruct" +family = "qwen" +release_date = "2026-03-01" +last_updated = "2026-03-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.09 +output = 1.10 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml new file mode 100644 index 000000000..67bed3f00 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/Qwen/Qwen3-VL-235B-A22B-Instruct +name = "Qwen3 VL 235B A22B Instruct" +family = "qwen" +release_date = "2025-05-01" +last_updated = "2025-05-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.20 +output = 0.88 +cache_read = 0.11 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml new file mode 100644 index 000000000..0954ffa6a --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-VL-30B-A3B-Instruct +name = "Qwen3 VL 30B A3B Instruct" +family = "qwen" +release_date = "2025-05-01" +last_updated = "2025-05-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.15 +output = 0.60 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml new file mode 100644 index 000000000..4940cd7b6 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Qwen/Qwen3.5-0.8B +name = "Qwen3.5 0.8B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.01 +output = 0.05 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml new file mode 100644 index 000000000..4f3020eb7 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3.5-122B-A10B +name = "Qwen3.5 122B A10B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.29 +output = 2.40 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml new file mode 100644 index 000000000..23b94b3de --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3.5-27B +name = "Qwen3.5 27B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.26 +output = 2.60 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml new file mode 100644 index 000000000..01cbc6d9a --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Qwen/Qwen3.5-2B +name = "Qwen3.5 2B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.02 +output = 0.10 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml index 10dbaf490..e342e5b18 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml @@ -10,9 +10,8 @@ tool_call = true open_weights = true [cost] -input = 0.2 -output = 0.95 -cached = 0.1 +input = 0.18 +output = 1.00 [limit] context = 262_144 diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml new file mode 100644 index 000000000..7ed305694 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Qwen/Qwen3.5-4B +name = "Qwen3.5 4B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.03 +output = 0.15 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml new file mode 100644 index 000000000..2320ecf02 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Qwen/Qwen3.5-9B +name = "Qwen3.5 9B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.04 +output = 0.15 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml new file mode 100644 index 000000000..dab29ef0a --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3.6-27B +name = "Qwen3.6 27B" +family = "qwen3.6" +release_date = "2026-04-01" +last_updated = "2026-04-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.32 +output = 3.20 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml index 5a42cc4e8..5c1c94a60 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml @@ -9,8 +9,8 @@ tool_call = true open_weights = true [cost] -input = 0.20 -output = 1.00 +input = 0.15 +output = 0.95 [limit] context = 262_144 diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml new file mode 100644 index 000000000..266f6ea62 --- /dev/null +++ b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Sao10K/L3-8B-Lunaris-v1-Turbo +name = "L3 8B Lunaris v1 Turbo" +family = "llama" +release_date = "2024-09-01" +last_updated = "2024-09-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.04 +output = 0.05 + +[limit] +context = 8192 +output = 2048 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml new file mode 100644 index 000000000..cab06dcc8 --- /dev/null +++ b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Sao10K/L3.1-70B-Euryale-v2.2 +name = "L3.1 70B Euryale v2.2" +family = "llama" +release_date = "2024-11-01" +last_updated = "2024-11-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.85 +output = 0.85 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml new file mode 100644 index 000000000..cb2d1cae7 --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528-Turbo +name = "DeepSeek R1 0528 Turbo" +family = "deepseek-thinking" +release_date = "2025-05-28" +last_updated = "2025-05-28" +attachment = false +reasoning = true +temperature = true +knowledge = "2024-07" +open_weights = false + +[cost] +input = 1.00 +output = 3.00 + +[limit] +context = 32_768 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml new file mode 100644 index 000000000..32b40c39a --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-R1-Distill-Llama-70B +name = "DeepSeek R1 Distill Llama 70B" +family = "deepseek-thinking" +release_date = "2025-02-01" +last_updated = "2025-02-01" +attachment = false +reasoning = true +temperature = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.70 +output = 0.80 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml new file mode 100644 index 000000000..a290691b9 --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3-0324 +name = "DeepSeek V3 0324" +family = "deepseek" +release_date = "2025-03-24" +last_updated = "2025-03-24" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-12" +open_weights = false + +[cost] +input = 0.20 +output = 0.77 +cache_read = 0.135 + +[limit] +context = 163_840 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml new file mode 100644 index 000000000..3f25ea8e7 --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3.1-Terminus +name = "DeepSeek V3.1 Terminus" +family = "deepseek" +release_date = "2026-01-01" +last_updated = "2026-01-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-12" +open_weights = false + +[cost] +input = 0.21 +output = 0.79 +cache_read = 0.13 + +[limit] +context = 163_840 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml new file mode 100644 index 000000000..81118a4ff --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3.1 +name = "DeepSeek V3.1" +family = "deepseek" +release_date = "2025-12-01" +last_updated = "2025-12-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-12" +open_weights = false + +[cost] +input = 0.21 +output = 0.79 +cache_read = 0.13 + +[limit] +context = 163_840 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml new file mode 100644 index 000000000..cad03c604 --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3 +name = "DeepSeek V3" +family = "deepseek" +release_date = "2025-11-01" +last_updated = "2025-11-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-12" +open_weights = false + +[cost] +input = 0.32 +output = 0.89 + +[limit] +context = 163_840 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml new file mode 100644 index 000000000..4632e23cf --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V4-Flash +name = "DeepSeek V4 Flash" +family = "deepseek-flash" +release_date = "2026-04-01" +last_updated = "2026-04-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.14 +output = 0.28 +cache_read = 0.028 + +[limit] +context = 1_048_576 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml new file mode 100644 index 000000000..d30ec29f7 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-3-12b-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-3-12b-it +name = "Gemma 3 12B IT" +family = "gemma" +release_date = "2025-02-01" +last_updated = "2025-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.04 +output = 0.13 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text", "image"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml new file mode 100644 index 000000000..4c4b9d3d0 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-3-27b-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-3-27b-it +name = "Gemma 3 27B IT" +family = "gemma" +release_date = "2025-02-01" +last_updated = "2025-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.08 +output = 0.16 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text", "image"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml new file mode 100644 index 000000000..6c7b160bf --- /dev/null +++ b/providers/deepinfra/models/google/gemma-3-4b-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-3-4b-it +name = "Gemma 3 4B IT" +family = "gemma" +release_date = "2025-02-01" +last_updated = "2025-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.04 +output = 0.08 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text", "image"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml new file mode 100644 index 000000000..a96231d05 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-4-26B-A4B-it +name = "Gemma 4 26B A4B IT" +family = "gemma" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.07 +output = 0.34 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/google/gemma-4-31B-it.toml b/providers/deepinfra/models/google/gemma-4-31B-it.toml new file mode 100644 index 000000000..71180fef5 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-4-31B-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-4-31B-it +name = "Gemma 4 31B IT" +family = "gemma" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.13 +output = 0.38 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml new file mode 100644 index 000000000..14039f65c --- /dev/null +++ b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/meta-llama/Llama-3.2-11B-Vision-Instruct +name = "Llama 3.2 11B Vision Instruct" +family = "llama" +release_date = "2024-10-01" +last_updated = "2024-10-01" +attachment = true +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.245 +output = 0.245 + +[limit] +context = 131_072 +output = 4096 + +[modalities] +input = ["text", "image"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml new file mode 100644 index 000000000..c2ff559fb --- /dev/null +++ b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/meta-llama/Llama-Guard-4-12B +name = "Llama Guard 4 12B" +family = "llama" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.18 +output = 0.18 + +[limit] +context = 163_840 +output = 4096 + +[modalities] +input = ["text", "image"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml new file mode 100644 index 000000000..4413a7167 --- /dev/null +++ b/providers/deepinfra/models/microsoft/phi-4.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/microsoft/phi-4 +name = "Phi 4" +family = "phi" +release_date = "2024-12-01" +last_updated = "2024-12-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = false + +[cost] +input = 0.07 +output = 0.14 + +[limit] +context = 16_384 +output = 4096 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml new file mode 100644 index 000000000..55c02dcdd --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/mistralai/Mistral-Nemo-Instruct-2407 +name = "Mistral Nemo Instruct 2407" +family = "mistral-nemo" +release_date = "2024-07-01" +last_updated = "2024-07-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = true + +[cost] +input = 0.02 +output = 0.04 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml new file mode 100644 index 000000000..141e53388 --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/mistralai/Mistral-Small-24B-Instruct-2501 +name = "Mistral Small 24B Instruct 2501" +family = "mistral-small" +release_date = "2025-01-01" +last_updated = "2025-01-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.05 +output = 0.08 + +[limit] +context = 32_768 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml new file mode 100644 index 000000000..d8c11caf1 --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/mistralai/Mistral-Small-3.2-24B-Instruct-2506 +name = "Mistral Small 3.2 24B Instruct 2506" +family = "mistral-small" +release_date = "2025-06-01" +last_updated = "2025-06-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.075 +output = 0.20 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml index 84183d853..c1d110370 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml @@ -11,9 +11,9 @@ knowledge = "2025-01" open_weights = true [cost] -input = 0.50 -output = 2.80 -cached_input = 0.09 +input = 0.45 +output = 2.25 +cache_read = 0.07 [limit] context = 262_144 diff --git a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml new file mode 100644 index 000000000..52f52fc2a --- /dev/null +++ b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5 +name = "Llama 3.3 Nemotron Super 49B v1.5" +family = "nemotron" +release_date = "2025-08-01" +last_updated = "2025-08-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.10 +output = 0.40 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml new file mode 100644 index 000000000..1b9ee873b --- /dev/null +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B +name = "Nemotron 3 Super 120B A12B" +family = "nemotron" +release_date = "2025-10-01" +last_updated = "2025-10-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.10 +output = 0.50 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml new file mode 100644 index 000000000..a74190003 --- /dev/null +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml @@ -0,0 +1,21 @@ +# https://deepinfra.com/nvidia/NVIDIA-Nemotron-Nano-9B-v2 +name = "Nemotron Nano 9B v2" +family = "nemotron" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +open_weights = true + +[cost] +input = 0.04 +output = 0.16 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml new file mode 100644 index 000000000..539125159 --- /dev/null +++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/nvidia/Nemotron-3-Nano-30B-A3B +name = "Nemotron 3 Nano 30B A3B" +family = "nemotron" +release_date = "2026-03-01" +last_updated = "2026-03-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.05 +output = 0.20 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml new file mode 100644 index 000000000..5224a123b --- /dev/null +++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning +name = "Nemotron 3 Nano Omni 30B A3B Reasoning" +family = "nemotron" +release_date = "2026-04-01" +last_updated = "2026-04-01" +attachment = true +reasoning = true +structured_output = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.20 +output = 0.80 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video", "audio"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml new file mode 100644 index 000000000..095afb11d --- /dev/null +++ b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/openai/gpt-oss-120b-Turbo +name = "GPT OSS 120B Turbo" +family = "gpt-oss" +release_date = "2025-09-01" +last_updated = "2025-09-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.15 +output = 0.60 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml new file mode 100644 index 000000000..80e947033 --- /dev/null +++ b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml @@ -0,0 +1,26 @@ +# https://deepinfra.com/stepfun-ai/Step-3.5-Flash +name = "Step 3.5 Flash" +family = "step" +release_date = "2025-12-01" +last_updated = "2025-12-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[interleaved] +field = "reasoning_content" + +[cost] +input = 0.10 +output = 0.30 +cache_read = 0.02 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml index 76b66a0c5..47dc4f9af 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml @@ -19,7 +19,7 @@ output = 1.75 cache_read = 0.08 [limit] -context = 202_752 +context = 202_752 # https://deepinfra.com/docs/advanced/max_tokens_limit output = 16_384 diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml index 257b2c673..de1007aa8 100644 --- a/providers/deepinfra/models/zai-org/GLM-5.toml +++ b/providers/deepinfra/models/zai-org/GLM-5.toml @@ -14,9 +14,9 @@ open_weights = true field = "reasoning_content" [cost] -input = 0.8 -output = 2.56 -cache_read = 0.16 +input = 0.60 +output = 2.08 +cache_read = 0.12 [limit] context = 202_752 From 2070df62c2ef55f2e3a556798f4e2dc912cb7779 Mon Sep 17 00:00:00 2001 From: RioPlay Date: Sat, 2 May 2026 02:07:50 -0500 Subject: [PATCH 2/9] fix(deepinfra): correct cache pricing field names (cache_read) across all providers --- providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml | 2 +- providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml | 2 +- providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml | 2 +- providers/vercel/models/deepseek/deepseek-v3.2-exp.toml | 2 +- providers/vercel/models/zai/glm-4.6.toml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml index a0b0ac5d7..21463f3e6 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml @@ -11,7 +11,7 @@ knowledge = "2025-06" [cost] input = 0.28 output = 1.20 -cached_read = 0.14 +cache_read = 0.14 [limit] context = 196_608 diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml index e726226c7..4a5cca112 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml @@ -15,7 +15,7 @@ field = "reasoning_content" [cost] input = 0.254 output = 1.02 -cached_input = 0.127 +cache_read = 0.127 [limit] context = 262_144 diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml index 05a7a0686..e961adf51 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml @@ -12,7 +12,7 @@ open_weights = true [cost] input = 0.54 output = 3.4 -cached = 0.27 +cache_read = 0.27 [limit] context = 262_144 diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml index b455e5568..4a3436a72 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml @@ -15,7 +15,7 @@ field = "reasoning_content" [cost] input = 0.47 output = 2.00 -cached_input = 0.141 +cache_read = 0.141 [limit] context = 131_072 diff --git a/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml b/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml index cccb3ccf8..9527b74b0 100644 --- a/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml +++ b/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml @@ -12,7 +12,7 @@ open_weights = false [cost] input = 0.27 output = 0.4 -cached_input = 0.27 +cache_read = 0.27 [limit] context = 163_840 diff --git a/providers/vercel/models/zai/glm-4.6.toml b/providers/vercel/models/zai/glm-4.6.toml index 82a689774..d43143488 100644 --- a/providers/vercel/models/zai/glm-4.6.toml +++ b/providers/vercel/models/zai/glm-4.6.toml @@ -14,7 +14,7 @@ interleaved = true [cost] input = 0.45 output = 1.8 -cached_input = 0.45 +cache_read = 0.45 [limit] context = 200_000 From 5ce16fb0fd7ac2d9225ff1d72b8bd42c88f27aad Mon Sep 17 00:00:00 2001 From: RioPlay Date: Sat, 2 May 2026 02:07:57 -0500 Subject: [PATCH 3/9] style(deepinfra): add trailing newlines, deepinfra.com comments, fix trailing whitespace --- providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml | 2 +- providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml | 3 ++- providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml | 1 + providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml | 2 ++ .../deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml | 2 +- .../deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml | 2 +- providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3-14B.toml | 2 +- .../deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml | 2 +- .../deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3-32B.toml | 2 +- .../models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml | 3 ++- .../deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml | 3 ++- .../deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml | 2 +- .../deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3.5-27B.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3.5-2B.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml | 2 ++ providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml | 2 ++ providers/deepinfra/models/Qwen/Qwen3.5-4B.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3.5-9B.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3.6-27B.toml | 2 +- providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml | 3 +++ providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml | 2 +- providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml | 2 +- .../deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml | 2 ++ providers/deepinfra/models/anthropic/claude-4-opus.toml | 2 ++ .../deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml | 2 +- providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml | 2 ++ .../models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml | 2 +- providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml | 2 +- .../deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml | 2 +- providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml | 2 +- providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml | 2 ++ providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml | 2 +- providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml | 2 +- providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml | 2 ++ providers/deepinfra/models/google/gemma-3-12b-it.toml | 2 +- providers/deepinfra/models/google/gemma-3-27b-it.toml | 2 +- providers/deepinfra/models/google/gemma-3-4b-it.toml | 2 +- providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml | 2 +- providers/deepinfra/models/google/gemma-4-31B-it.toml | 2 +- .../models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml | 2 ++ .../deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml | 2 ++ .../models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml | 2 ++ .../deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml | 2 ++ .../models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml | 2 +- .../models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml | 2 ++ .../meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml | 2 ++ .../models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml | 2 ++ providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml | 2 +- providers/deepinfra/models/microsoft/phi-4.toml | 2 +- .../deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml | 2 +- .../models/mistralai/Mistral-Small-24B-Instruct-2501.toml | 2 +- .../models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml | 2 +- .../deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml | 2 +- providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml | 2 ++ providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml | 2 ++ providers/deepinfra/models/moonshotai/Kimi-K2.5.toml | 3 ++- providers/deepinfra/models/moonshotai/Kimi-K2.6.toml | 2 ++ .../models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml | 2 +- .../models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml | 2 +- .../deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml | 2 +- providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml | 2 +- .../models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml | 2 +- providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml | 2 +- providers/deepinfra/models/openai/gpt-oss-120b.toml | 1 + providers/deepinfra/models/openai/gpt-oss-20b.toml | 1 + providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml | 2 +- providers/deepinfra/models/zai-org/GLM-4.5.toml | 2 ++ providers/deepinfra/models/zai-org/GLM-4.6.toml | 2 +- providers/deepinfra/models/zai-org/GLM-4.6V.toml | 2 +- providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml | 1 + providers/deepinfra/models/zai-org/GLM-4.7.toml | 1 + providers/deepinfra/models/zai-org/GLM-5.1.toml | 3 ++- providers/deepinfra/models/zai-org/GLM-5.toml | 3 ++- 80 files changed, 107 insertions(+), 55 deletions(-) diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml index f66b57a4c..d8e56482b 100644 --- a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml +++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml @@ -18,4 +18,4 @@ output = 2048 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml index 21463f3e6..c0e928ab7 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/MiniMaxAI/MiniMax-M2.1 name = "MiniMax M2.1" release_date = "2025-12-23" last_updated = "2025-12-23" @@ -16,7 +17,7 @@ cache_read = 0.14 [limit] context = 196_608 output = 196_608 - + [modalities] input = ["text"] output = ["text"] diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml index 0a3d9dcf8..f40361980 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml @@ -24,3 +24,4 @@ output = ["text"] [interleaved] field = "reasoning_content" + diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml index 4a5cca112..333efc4b4 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/MiniMaxAI/MiniMax-M2 name = "MiniMax M2" family = "minimax" release_date = "2025-11-13" @@ -24,3 +25,4 @@ output = 32_768 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml index dccf1e193..18cad0a2a 100644 --- a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml +++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml @@ -20,4 +20,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml index 3bb7f2a06..5936ea888 100644 --- a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml +++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml @@ -20,4 +20,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml index e459c46a1..d88fd6fc9 100644 --- a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml +++ b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml @@ -20,4 +20,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-14B.toml b/providers/deepinfra/models/Qwen/Qwen3-14B.toml index 021b0fb44..50529e7f4 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-14B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-14B.toml @@ -20,4 +20,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml index 63d303946..f89d4830a 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml @@ -20,4 +20,4 @@ output = 81_920 [modalities] input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml index ce52431dc..4d616a932 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml @@ -21,4 +21,4 @@ output = 81_920 [modalities] input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml index 60209c9fd..362cc6cfe 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml @@ -20,4 +20,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-32B.toml b/providers/deepinfra/models/Qwen/Qwen3-32B.toml index 8fb7ef139..d5a5040a0 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-32B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-32B.toml @@ -20,4 +20,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml index 1154bb779..8b1303f14 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo name = "Qwen3 Coder 480B A35B Instruct Turbo" family = "qwen" release_date = "2025-07-23" @@ -20,4 +21,4 @@ output = 66_536 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml index 08c0ed4ab..c556f98b8 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/Qwen/Qwen3-Coder-480B-A35B-Instruct name = "Qwen3 Coder 480B A35B Instruct" family = "qwen" release_date = "2025-07-23" @@ -19,4 +20,4 @@ output = 66_536 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml index abcf7d075..9bd4fa4f7 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml @@ -20,4 +20,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml index 67bed3f00..a1f5c5c71 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml @@ -21,4 +21,4 @@ output = 81_920 [modalities] input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml index 0954ffa6a..e9277d8b3 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml @@ -20,4 +20,4 @@ output = 81_920 [modalities] input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml index 4940cd7b6..afb332615 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml @@ -19,4 +19,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml index 4f3020eb7..6a8de84a4 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml @@ -20,4 +20,4 @@ output = 81_920 [modalities] input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml index 23b94b3de..7b0cae035 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml @@ -20,4 +20,4 @@ output = 81_920 [modalities] input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml index 01cbc6d9a..8954a1c1a 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml @@ -19,4 +19,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml index e342e5b18..ee19fe8c4 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/Qwen/Qwen3.5-35B-A3B name = "Qwen 3.5 35B A3B" family = "qwen" release_date = "2026-02-01" @@ -20,3 +21,4 @@ output = 81_920 [modalities] input = ["text","image","video"] output = ["text"] + diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml index e961adf51..e5a826187 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/Qwen/Qwen3.5-397B-A17B name = "Qwen 3.5 397B A17B" family = "qwen" release_date = "2026-02-01" @@ -21,3 +22,4 @@ output = 81_920 [modalities] input = ["text","image","video"] output = ["text"] + diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml index 7ed305694..831beb24f 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml @@ -19,4 +19,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml index 2320ecf02..4b5999d86 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml @@ -19,4 +19,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml index dab29ef0a..5540d5cd5 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml @@ -20,4 +20,4 @@ output = 81_920 [modalities] input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml index 5c1c94a60..7ffe7d817 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml @@ -1,3 +1,5 @@ +# https://deepinfra.com/Qwen/Qwen3.6-35B-A3B +# https://deepinfra.com/Qwen/Qwen3.6-35B-A3B name = "Qwen3.6 35B A3B" family = "qwen" release_date = "2026-04-01" @@ -19,3 +21,4 @@ output = 81_920 [modalities] input = ["text", "image", "video"] output = ["text"] + diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml index 266f6ea62..71ee291c8 100644 --- a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml +++ b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml @@ -19,4 +19,4 @@ output = 2048 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml index cab06dcc8..791da9937 100644 --- a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml +++ b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml @@ -19,4 +19,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml index 28d93a4fb..605a835e0 100644 --- a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml +++ b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/anthropic/claude-3-7-sonnet-latest name = "Claude Sonnet 3.7 (Latest)" family = "claude-sonnet" release_date = "2025-03-13" @@ -21,3 +22,4 @@ output = 64_000 [modalities] input = ["text", "image"] output = ["text"] + diff --git a/providers/deepinfra/models/anthropic/claude-4-opus.toml b/providers/deepinfra/models/anthropic/claude-4-opus.toml index 51f441d78..007d325c2 100644 --- a/providers/deepinfra/models/anthropic/claude-4-opus.toml +++ b/providers/deepinfra/models/anthropic/claude-4-opus.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/anthropic/claude-4-opus name = "Claude Opus 4" family = "claude-opus" release_date = "2025-06-12" @@ -20,3 +21,4 @@ output = 32_000 [modalities] input = ["text", "image"] output = ["text"] + diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml index cb2d1cae7..949934ae1 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml @@ -19,4 +19,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml index 2733a6938..40cb5a1b1 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528 name = "DeepSeek-R1-0528" release_date = "2025-05-28" last_updated = "2025-05-28" @@ -23,3 +24,4 @@ output = 64_000 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml index 32b40c39a..13616d0bf 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml @@ -19,4 +19,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml index a290691b9..5e659a00e 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml @@ -21,4 +21,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml index 3f25ea8e7..12a439bbc 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml @@ -21,4 +21,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml index 81118a4ff..e64e5eb3a 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml @@ -21,4 +21,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml index 74d5e7d60..954cef7a8 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3.2 name = "DeepSeek-V3.2" release_date = "2025-12-02" last_updated = "2025-12-02" @@ -23,3 +24,4 @@ output = 64_000 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml index cad03c604..8acef8588 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml @@ -20,4 +20,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml index 4632e23cf..e1f00596c 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml @@ -20,4 +20,4 @@ output = 131_072 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml index e867b3a48..59057d4f0 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V4-Pro attachment = false [extends] @@ -6,3 +7,4 @@ from = "deepseek/deepseek-v4-pro" [limit] context = 65_536 output = 65_536 + diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml index d30ec29f7..4e54d7491 100644 --- a/providers/deepinfra/models/google/gemma-3-12b-it.toml +++ b/providers/deepinfra/models/google/gemma-3-12b-it.toml @@ -20,4 +20,4 @@ output = 8192 [modalities] input = ["text", "image"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml index 4c4b9d3d0..394a82b5d 100644 --- a/providers/deepinfra/models/google/gemma-3-27b-it.toml +++ b/providers/deepinfra/models/google/gemma-3-27b-it.toml @@ -20,4 +20,4 @@ output = 8192 [modalities] input = ["text", "image"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml index 6c7b160bf..6c57358d5 100644 --- a/providers/deepinfra/models/google/gemma-3-4b-it.toml +++ b/providers/deepinfra/models/google/gemma-3-4b-it.toml @@ -20,4 +20,4 @@ output = 8192 [modalities] input = ["text", "image"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml index a96231d05..71f2ae483 100644 --- a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml +++ b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml @@ -20,4 +20,4 @@ output = 16_384 [modalities] input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-4-31B-it.toml b/providers/deepinfra/models/google/gemma-4-31B-it.toml index 71180fef5..9c53c1d46 100644 --- a/providers/deepinfra/models/google/gemma-4-31B-it.toml +++ b/providers/deepinfra/models/google/gemma-4-31B-it.toml @@ -20,4 +20,4 @@ output = 16_384 [modalities] input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml index 79e674844..b26fd8538 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.1-70B-Instruct-Turbo name = "Llama 3.1 70B Turbo" family = "llama" release_date = "2024-07-23" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml index 2edbcb221..9d7a096b1 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.1-70B-Instruct name = "Llama 3.1 70B" family = "llama" release_date = "2024-07-23" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml index 3646a2d47..a883556e7 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.1-8B-Instruct-Turbo name = "Llama 3.1 8B Turbo" family = "llama" release_date = "2024-07-23" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml index ec3539991..b2e9f62ee 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.1-8B-Instruct name = "Llama 3.1 8B" family = "llama" release_date = "2024-07-23" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml index 14039f65c..e5ec1eaf7 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml @@ -19,4 +19,4 @@ output = 4096 [modalities] input = ["text", "image"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml index df433558b..6903f0a56 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.3-70B-Instruct-Turbo name = "Llama 3.3 70B Turbo" family = "llama" release_date = "2024-12-06" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml index 1a2b88bbc..05e0d1afd 100644 --- a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml +++ b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 name = "Llama 4 Maverick 17B FP8" family = "llama" release_date = "2025-04-05" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text", "image"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml index 8f7bab10f..efcb012ef 100644 --- a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-4-Scout-17B-16E-Instruct name = "Llama 4 Scout 17B" family = "llama" release_date = "2025-04-05" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text", "image"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml index c2ff559fb..4f00ea1f7 100644 --- a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml +++ b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml @@ -19,4 +19,4 @@ output = 4096 [modalities] input = ["text", "image"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml index 4413a7167..ea25cd4e6 100644 --- a/providers/deepinfra/models/microsoft/phi-4.toml +++ b/providers/deepinfra/models/microsoft/phi-4.toml @@ -20,4 +20,4 @@ output = 4096 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml index 55c02dcdd..e865afb64 100644 --- a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml +++ b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml @@ -20,4 +20,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml index 141e53388..b2dd4dd4b 100644 --- a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml +++ b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml @@ -20,4 +20,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml index d8c11caf1..4107b6d91 100644 --- a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml +++ b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml @@ -20,4 +20,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml index 180430bf1..6eadca87c 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml @@ -21,4 +21,4 @@ output = 262_144 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml index 285310a44..11a976885 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/moonshotai/Kimi-K2-Instruct name = "Kimi K2" family = "kimi" release_date = "2025-07-11" @@ -20,3 +21,4 @@ output = 32_768 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml index 4a3436a72..2a56b9671 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/moonshotai/Kimi-K2-Thinking name = "Kimi K2 Thinking" family = "kimi-thinking" release_date = "2025-11-06" @@ -24,3 +25,4 @@ output = 32_768 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml index c1d110370..7663f4983 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/moonshotai/Kimi-K2.5 name = "Kimi K2.5" family = "kimi" release_date = "2026-01-27" @@ -24,4 +25,4 @@ input = ["text", "image", "video"] output = ["text"] [interleaved] -field = "reasoning_content" \ No newline at end of file +field = "reasoning_content" diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml index ebcf7f7c9..16e6bdfc7 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/moonshotai/Kimi-K2.6 name = "Kimi K2.6" family = "kimi" release_date = "2026-04-21" @@ -26,3 +27,4 @@ output = 16_384 [modalities] input = ["text", "image", "video"] output = ["text"] + diff --git a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml index 52f52fc2a..c1fa2f2fc 100644 --- a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml +++ b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml @@ -19,4 +19,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml index 1b9ee873b..9313244eb 100644 --- a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml @@ -19,4 +19,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml index a74190003..3f2950494 100644 --- a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml @@ -18,4 +18,4 @@ output = 8192 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml index 539125159..0c0448b6e 100644 --- a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml +++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml @@ -19,4 +19,4 @@ output = 81_920 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml index 5224a123b..c7378fdc9 100644 --- a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml +++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml @@ -21,4 +21,4 @@ output = 81_920 [modalities] input = ["text", "image", "video", "audio"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml index 095afb11d..2cc256b55 100644 --- a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml +++ b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml @@ -19,4 +19,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml index 0889e79c2..5243442c0 100644 --- a/providers/deepinfra/models/openai/gpt-oss-120b.toml +++ b/providers/deepinfra/models/openai/gpt-oss-120b.toml @@ -22,3 +22,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/openai/gpt-oss-20b.toml b/providers/deepinfra/models/openai/gpt-oss-20b.toml index 9342fee14..7a0c249c4 100644 --- a/providers/deepinfra/models/openai/gpt-oss-20b.toml +++ b/providers/deepinfra/models/openai/gpt-oss-20b.toml @@ -22,3 +22,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml index 80e947033..19b280c7b 100644 --- a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml +++ b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml @@ -23,4 +23,4 @@ output = 16_384 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.5.toml b/providers/deepinfra/models/zai-org/GLM-4.5.toml index ae5ae7ec1..0bff83456 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.5.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.5.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/zai-org/GLM-4.5 name = "GLM-4.5" family = "glm" release_date = "2025-07-28" @@ -24,3 +25,4 @@ output = 98_304 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/zai-org/GLM-4.6.toml b/providers/deepinfra/models/zai-org/GLM-4.6.toml index 63eb5b3f7..13d75372f 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.6.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.6.toml @@ -24,4 +24,4 @@ output = 131_072 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.6V.toml b/providers/deepinfra/models/zai-org/GLM-4.6V.toml index b3f6ce288..59db6966e 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.6V.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.6V.toml @@ -23,4 +23,4 @@ output = 131_072 [modalities] input = ["text", "image"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml index dbb42a2d8..784459f81 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml @@ -25,3 +25,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml index 47dc4f9af..fa985538c 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml @@ -26,3 +26,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml index 1b901d25e..286d0c14a 100644 --- a/providers/deepinfra/models/zai-org/GLM-5.1.toml +++ b/providers/deepinfra/models/zai-org/GLM-5.1.toml @@ -20,10 +20,11 @@ output = 4.4 cache_read = 0.26 [limit] -context = 202_752 +context = 202_752 # https://deepinfra.com/docs/advanced/max_tokens_limit output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml index de1007aa8..334829a39 100644 --- a/providers/deepinfra/models/zai-org/GLM-5.toml +++ b/providers/deepinfra/models/zai-org/GLM-5.toml @@ -19,10 +19,11 @@ output = 2.08 cache_read = 0.12 [limit] -context = 202_752 +context = 202_752 # https://deepinfra.com/docs/advanced/max_tokens_limit output = 16_384 [modalities] input = ["text"] output = ["text"] + From d8a5369f50b9838f3ad5e113a9ac66b73a01e712 Mon Sep 17 00:00:00 2001 From: RioPlay Date: Sat, 2 May 2026 02:08:04 -0500 Subject: [PATCH 4/9] fix(deepinfra): correct pricing for GLM-5.1 and add missing cache_read for GLM-4.7-Flash --- providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml | 1 + providers/deepinfra/models/zai-org/GLM-5.1.toml | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml index 784459f81..bbc1835a1 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml @@ -16,6 +16,7 @@ field = "reasoning_content" [cost] input = 0.06 output = 0.40 +cache_read = 0.01 [limit] context = 202_752 diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml index 286d0c14a..3f0e7d742 100644 --- a/providers/deepinfra/models/zai-org/GLM-5.1.toml +++ b/providers/deepinfra/models/zai-org/GLM-5.1.toml @@ -15,9 +15,9 @@ structured_output = true field = "reasoning_content" [cost] -input = 1.4 -output = 4.4 -cache_read = 0.26 +input = 1.05 +output = 3.50 +cache_read = 0.205 [limit] context = 202_752 From b3ffde6261f7145cf0032c1208bf32106c49db8f Mon Sep 17 00:00:00 2001 From: RioPlay Date: Sat, 2 May 2026 02:08:08 -0500 Subject: [PATCH 5/9] =?UTF-8?q?fix(deepinfra):=20correct=20pricing=20for?= =?UTF-8?q?=20GLM-4.7=20(input=200.43=E2=86=920.40)=20and=20gpt-oss-120b?= =?UTF-8?q?=20(0.05=E2=86=920.039,=200.24=E2=86=920.19)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- providers/deepinfra/models/openai/gpt-oss-120b.toml | 4 ++-- providers/deepinfra/models/zai-org/GLM-4.7.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml index 5243442c0..d548bfcfa 100644 --- a/providers/deepinfra/models/openai/gpt-oss-120b.toml +++ b/providers/deepinfra/models/openai/gpt-oss-120b.toml @@ -11,8 +11,8 @@ tool_call = true open_weights = true [cost] -input = 0.05 -output = 0.24 +input = 0.039 +output = 0.19 [limit] context = 131_072 diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml index fa985538c..cf984ad11 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml @@ -14,7 +14,7 @@ open_weights = true field = "reasoning_content" [cost] -input = 0.43 +input = 0.40 output = 1.75 cache_read = 0.08 From fdf26decfad55d54ddd263e45be952ee35fdeb72 Mon Sep 17 00:00:00 2001 From: RioPlay Date: Sat, 2 May 2026 02:08:13 -0500 Subject: [PATCH 6/9] fix(deepinfra): restore missing cache_read for MiniMax-M2.5 --- providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml index f40361980..fb5ba5865 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml @@ -13,6 +13,7 @@ open_weights = true [cost] input = 0.15 output = 1.15 +cache_read = 0.03 [limit] context = 204_800 From baaa68a2f63c7329738ee8ea1bc358d0621a733f Mon Sep 17 00:00:00 2001 From: RioPlay Date: Sat, 2 May 2026 02:16:11 -0500 Subject: [PATCH 7/9] fix(deepinfra): add missing tool_call field to MythoMax-L2-13b --- providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml index d8e56482b..0bb65efd7 100644 --- a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml +++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml @@ -6,6 +6,7 @@ last_updated = "2023-11-01" attachment = false reasoning = false temperature = true +tool_call = true open_weights = true [cost] From e7cff4108759023119cbc0f88c06291cf2629f08 Mon Sep 17 00:00:00 2001 From: RioPlay Date: Sat, 2 May 2026 02:18:35 -0500 Subject: [PATCH 8/9] fix(deepinfra): add missing tool_call to 3 models --- .../deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml | 1 + .../models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml | 1 + .../deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml | 1 + 3 files changed, 3 insertions(+) diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml index 949934ae1..07dbecc67 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml @@ -6,6 +6,7 @@ last_updated = "2025-05-28" attachment = false reasoning = true temperature = true +tool_call = true knowledge = "2024-07" open_weights = false diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml index 13616d0bf..fc563d534 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml @@ -6,6 +6,7 @@ last_updated = "2025-02-01" attachment = false reasoning = true temperature = true +tool_call = true knowledge = "2024-10" open_weights = true diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml index 3f2950494..38a0aab72 100644 --- a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml @@ -6,6 +6,7 @@ last_updated = "2026-02-01" attachment = false reasoning = true temperature = true +tool_call = true open_weights = true [cost] From 14a53aefef01b982dbec348ae59b8d5cac756fe7 Mon Sep 17 00:00:00 2001 From: RioPlay Date: Sat, 2 May 2026 02:24:19 -0500 Subject: [PATCH 9/9] revert: restore vercel provider files to original state --- providers/vercel/models/deepseek/deepseek-v3.2-exp.toml | 2 +- providers/vercel/models/zai/glm-4.6.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml b/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml index 9527b74b0..cccb3ccf8 100644 --- a/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml +++ b/providers/vercel/models/deepseek/deepseek-v3.2-exp.toml @@ -12,7 +12,7 @@ open_weights = false [cost] input = 0.27 output = 0.4 -cache_read = 0.27 +cached_input = 0.27 [limit] context = 163_840 diff --git a/providers/vercel/models/zai/glm-4.6.toml b/providers/vercel/models/zai/glm-4.6.toml index d43143488..82a689774 100644 --- a/providers/vercel/models/zai/glm-4.6.toml +++ b/providers/vercel/models/zai/glm-4.6.toml @@ -14,7 +14,7 @@ interleaved = true [cost] input = 0.45 output = 1.8 -cache_read = 0.45 +cached_input = 0.45 [limit] context = 200_000