From 3aecc94c46efd5d9e096560284b571288dacdc08 Mon Sep 17 00:00:00 2001
From: Isaac Ng <5kahoisaac@gmail.com>
Date: Sat, 2 May 2026 02:12:31 +0800
Subject: [PATCH 1/4] chore(nvidia): sync endpoint model catalog

Update NVIDIA model TOMLs to match the live Build endpoint list by removing stale entries and adding missing ones.

This keeps the provider catalog aligned with the current free and partner endpoint inventory.
---
 .../dracarys-llama-3_1-70b-instruct.toml}     | 13 +++++-----
 providers/nvidia/models/baai/bge-m3.toml      | 21 +++++++++++++++
 .../black-forest-labs/flux_1-kontext-dev.toml | 20 ++++++++++++++
 .../black-forest-labs/flux_1-schnell.toml     | 23 ++++++++++++++++
 .../black-forest-labs/flux_2-klein-4b.toml    | 22 ++++++++++++++++
 .../seed-oss-36b-instruct.toml}               | 15 ++++++-----
 .../deepseek-coder-6.7b-instruct.toml         | 21 ---------------
 .../models/deepseek-ai/deepseek-r1-0528.toml  | 21 ---------------
 .../models/deepseek-ai/deepseek-r1.toml       | 21 ---------------
 .../models/deepseek-ai/deepseek-v3.1.toml     | 22 ----------------
 .../models/google/codegemma-1.1-7b.toml       | 21 ---------------
 .../nvidia/models/google/codegemma-7b.toml    | 21 ---------------
 .../nvidia/models/google/gemma-2-27b-it.toml  | 21 ---------------
 .../nvidia/models/google/gemma-3-12b-it.toml  | 21 ---------------
 .../nvidia/models/google/gemma-3-1b-it.toml   | 21 ---------------
 .../models/google/google-paligemma.toml       | 20 ++++++++++++++
 .../nvidia/models/meta/codellama-70b.toml     | 21 ---------------
 providers/nvidia/models/meta/esm2-650m.toml   | 20 ++++++++++++++
 providers/nvidia/models/meta/esmfold.toml     | 20 ++++++++++++++
 .../models/meta/llama-3.1-405b-instruct.toml  | 21 ---------------
 .../models/meta/llama-3.1-8b-instruct.toml    | 22 ++++++++++++++++
 .../models/meta/llama-3.2-3b-instruct.toml    | 22 ++++++++++++++++
 .../meta/llama-3.2-90b-vision-instruct.toml   | 22 ++++++++++++++++
 .../meta/llama-4-scout-17b-16e-instruct.toml  | 22 ----------------
 .../nvidia/models/meta/llama-guard-4-12b.toml | 21 +++++++++++++++
 .../microsoft/phi-3-medium-128k-instruct.toml | 22 ----------------
 .../microsoft/phi-3-medium-4k-instruct.toml   | 22 ----------------
 .../microsoft/phi-3-small-128k-instruct.toml  | 22 ----------------
 .../microsoft/phi-3-small-8k-instruct.toml    | 22 ----------------
 .../microsoft/phi-3-vision-128k-instruct.toml | 21 ---------------
 .../microsoft/phi-3.5-moe-instruct.toml       | 21 ---------------
 .../microsoft/phi-3.5-vision-instruct.toml    | 21 ---------------
 .../microsoft/phi-4-multimodal-instruct.toml  | 21 +++++++++++++++
 .../codestral-22b-instruct-v0.1.toml          | 21 ---------------
 .../mistralai/magistral-small-2506.toml       | 21 +++++++++++++++
 .../mistralai/mamba-codestral-7b-v0.1.toml    | 21 ---------------
 .../mistral-7b-instruct-v03.toml}             | 14 +++++-----
 .../mistralai/mistral-large-2-instruct.toml   | 21 ---------------
 .../mistralai/mistral-medium-3-instruct.toml  | 22 ++++++++++++++++
 .../mistralai/mistral-medium-3.5-128b.toml    |  8 ------
 .../models/mistralai/mistral-nemotron.toml    | 20 ++++++++++++++
 .../mistral-small-3.1-24b-instruct-2503.toml  | 21 ---------------
 .../mistralai/mistral-small-4-119b-2603.toml  | 20 ++++++++++++++
 .../mixtral-8x22b-instruct.toml}              | 15 +++++------
 .../mistralai/mixtral-8x7b-instruct.toml      | 20 ++++++++++++++
 .../nvidia/models/moonshotai/kimi-k2.5.toml   | 26 -------------------
 .../nvidia/active-speaker-detection.toml      | 20 ++++++++++++++
 providers/nvidia/models/nvidia/bevformer.toml | 20 ++++++++++++++
 .../models/nvidia/cosmos-nemotron-34b.toml    | 22 ----------------
 .../models/nvidia/cosmos-predict1-5b.toml     | 20 ++++++++++++++
 .../models/nvidia/cosmos-transfer1-7b.toml    | 20 ++++++++++++++
 .../models/nvidia/cosmos-transfer2_5-2b.toml  | 20 ++++++++++++++
 .../nvidia/models/nvidia/gliner-pii.toml      | 20 ++++++++++++++
 .../llama-3.1-nemotron-70b-instruct.toml      | 21 ---------------
 .../llama-3.1-nemotron-ultra-253b-v1.toml     | 22 ----------------
 ...llama-3_1-nemotron-safety-guard-8b-v3.toml | 20 ++++++++++++++
 ...lama-3_2-nemoretriever-300m-embed-v1.toml} | 12 ++++-----
 .../nvidia/llama-nemotron-embed-vl-1b-v2.toml | 20 ++++++++++++++
 .../llama-nemotron-rerank-vl-1b-v2.toml       | 20 ++++++++++++++
 .../models/nvidia/llama3-chatqa-1.5-70b.toml  | 21 ---------------
 .../models/nvidia/magpie-tts-zeroshot.toml    | 20 ++++++++++++++
 .../models/nvidia/nemoretriever-ocr-v1.toml   | 22 ----------------
 .../nvidia/nemotron-3-content-safety.toml     | 20 ++++++++++++++
 .../nvidia/nemotron-4-340b-instruct.toml      | 21 ---------------
 .../nemotron-content-safety-reasoning-4b.toml | 20 ++++++++++++++
 .../nvidia/nemotron-mini-4b-instruct.toml     | 20 ++++++++++++++
 .../nemotron-nano-12b-v2-vl.toml}             | 19 +++++++-------
 .../models/nvidia/nemotron-voicechat.toml     | 20 ++++++++++++++
 .../nvidia/models/nvidia/nv-embed-v1.toml     | 20 ++++++++++++++
 .../models/nvidia/nv-embedcode-7b-v1.toml     | 20 ++++++++++++++
 .../models/nvidia/parakeet-tdt-0.6b-v2.toml   | 22 ----------------
 .../models/nvidia/rerank-qa-mistral-4b.toml   | 20 ++++++++++++++
 .../riva-translate-4b-instruct-v1_1.toml      | 20 ++++++++++++++
 .../nvidia/models/nvidia/sparsedrive.toml     | 20 ++++++++++++++
 .../nvidia/models/nvidia/streampetr.toml      | 20 ++++++++++++++
 .../nvidia/models/nvidia/studiovoice.toml     | 20 ++++++++++++++
 .../nvidia/synthetic-video-detector.toml      | 20 ++++++++++++++
 providers/nvidia/models/nvidia/usdcode.toml   | 20 ++++++++++++++
 .../nvidia/models/nvidia/usdvalidate.toml     | 20 ++++++++++++++
 .../glm5.toml => openai/gpt-oss-20b.toml}     | 15 +++++------
 .../qwen/qwen2.5-coder-7b-instruct.toml       | 21 ---------------
 .../nvidia/models/qwen/qwen3-235b-a22b.toml   | 22 ----------------
 .../nvidia/models/qwen/qwen3.5-122b-a10b.toml | 22 ++++++++++++++++
 providers/nvidia/models/qwen/qwq-32b.toml     | 21 ---------------
 .../nvidia/models/sarvamai/sarvam-m.toml      | 20 ++++++++++++++
 .../models/upstage/solar-10_7b-instruct.toml  | 20 ++++++++++++++
 86 files changed, 947 insertions(+), 793 deletions(-)
 rename providers/nvidia/models/{minimaxai/minimax-m2.1.toml => abacusai/dracarys-llama-3_1-70b-instruct.toml} (52%)
 create mode 100644 providers/nvidia/models/baai/bge-m3.toml
 create mode 100644 providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml
 create mode 100644 providers/nvidia/models/black-forest-labs/flux_1-schnell.toml
 create mode 100644 providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml
 rename providers/nvidia/models/{nvidia/llama-3.1-nemotron-51b-instruct.toml => bytedance/seed-oss-36b-instruct.toml} (50%)
 delete mode 100644 providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml
 delete mode 100644 providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml
 delete mode 100644 providers/nvidia/models/deepseek-ai/deepseek-r1.toml
 delete mode 100644 providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml
 delete mode 100644 providers/nvidia/models/google/codegemma-1.1-7b.toml
 delete mode 100644 providers/nvidia/models/google/codegemma-7b.toml
 delete mode 100644 providers/nvidia/models/google/gemma-2-27b-it.toml
 delete mode 100644 providers/nvidia/models/google/gemma-3-12b-it.toml
 delete mode 100644 providers/nvidia/models/google/gemma-3-1b-it.toml
 create mode 100644 providers/nvidia/models/google/google-paligemma.toml
 delete mode 100644 providers/nvidia/models/meta/codellama-70b.toml
 create mode 100644 providers/nvidia/models/meta/esm2-650m.toml
 create mode 100644 providers/nvidia/models/meta/esmfold.toml
 delete mode 100644 providers/nvidia/models/meta/llama-3.1-405b-instruct.toml
 create mode 100644 providers/nvidia/models/meta/llama-3.1-8b-instruct.toml
 create mode 100644 providers/nvidia/models/meta/llama-3.2-3b-instruct.toml
 create mode 100644 providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml
 delete mode 100644 providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml
 create mode 100644 providers/nvidia/models/meta/llama-guard-4-12b.toml
 delete mode 100644 providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml
 delete mode 100644 providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml
 delete mode 100644 providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml
 delete mode 100644 providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml
 delete mode 100644 providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml
 delete mode 100644 providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml
 delete mode 100644 providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml
 create mode 100644 providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml
 delete mode 100644 providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml
 create mode 100644 providers/nvidia/models/mistralai/magistral-small-2506.toml
 delete mode 100644 providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml
 rename providers/nvidia/models/{meta/llama3-70b-instruct.toml => mistralai/mistral-7b-instruct-v03.toml} (55%)
 delete mode 100644 providers/nvidia/models/mistralai/mistral-large-2-instruct.toml
 create mode 100644 providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml
 delete mode 100644 providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml
 create mode 100644 providers/nvidia/models/mistralai/mistral-nemotron.toml
 delete mode 100644 providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml
 create mode 100644 providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml
 rename providers/nvidia/models/{meta/llama3-8b-instruct.toml => mistralai/mixtral-8x22b-instruct.toml} (65%)
 create mode 100644 providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml
 delete mode 100644 providers/nvidia/models/moonshotai/kimi-k2.5.toml
 create mode 100644 providers/nvidia/models/nvidia/active-speaker-detection.toml
 create mode 100644 providers/nvidia/models/nvidia/bevformer.toml
 delete mode 100644 providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml
 create mode 100644 providers/nvidia/models/nvidia/cosmos-predict1-5b.toml
 create mode 100644 providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml
 create mode 100644 providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml
 create mode 100644 providers/nvidia/models/nvidia/gliner-pii.toml
 delete mode 100644 providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml
 delete mode 100644 providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml
 create mode 100644 providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml
 rename providers/nvidia/models/nvidia/{llama-embed-nemotron-8b.toml => llama-3_2-nemoretriever-300m-embed-v1.toml} (52%)
 create mode 100644 providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml
 create mode 100644 providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml
 delete mode 100644 providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml
 create mode 100644 providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml
 delete mode 100644 providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml
 create mode 100644 providers/nvidia/models/nvidia/nemotron-3-content-safety.toml
 delete mode 100644 providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml
 create mode 100644 providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml
 create mode 100644 providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml
 rename providers/nvidia/models/{mistralai/ministral-14b-instruct-2512.toml => nvidia/nemotron-nano-12b-v2-vl.toml} (51%)
 create mode 100644 providers/nvidia/models/nvidia/nemotron-voicechat.toml
 create mode 100644 providers/nvidia/models/nvidia/nv-embed-v1.toml
 create mode 100644 providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml
 delete mode 100644 providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml
 create mode 100644 providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml
 create mode 100644 providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml
 create mode 100644 providers/nvidia/models/nvidia/sparsedrive.toml
 create mode 100644 providers/nvidia/models/nvidia/streampetr.toml
 create mode 100644 providers/nvidia/models/nvidia/studiovoice.toml
 create mode 100644 providers/nvidia/models/nvidia/synthetic-video-detector.toml
 create mode 100644 providers/nvidia/models/nvidia/usdcode.toml
 create mode 100644 providers/nvidia/models/nvidia/usdvalidate.toml
 rename providers/nvidia/models/{z-ai/glm5.toml => openai/gpt-oss-20b.toml} (56%)
 delete mode 100644 providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml
 delete mode 100644 providers/nvidia/models/qwen/qwen3-235b-a22b.toml
 create mode 100644 providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml
 delete mode 100644 providers/nvidia/models/qwen/qwq-32b.toml
 create mode 100644 providers/nvidia/models/sarvamai/sarvam-m.toml
 create mode 100644 providers/nvidia/models/upstage/solar-10_7b-instruct.toml

diff --git a/providers/nvidia/models/minimaxai/minimax-m2.1.toml b/providers/nvidia/models/abacusai/dracarys-llama-3_1-70b-instruct.toml
similarity index 52%
rename from providers/nvidia/models/minimaxai/minimax-m2.1.toml
rename to providers/nvidia/models/abacusai/dracarys-llama-3_1-70b-instruct.toml
index 9e9c0a249..e94026416 100644
--- a/providers/nvidia/models/minimaxai/minimax-m2.1.toml
+++ b/providers/nvidia/models/abacusai/dracarys-llama-3_1-70b-instruct.toml
@@ -1,9 +1,8 @@
-name = "MiniMax-M2.1"
-family = "minimax"
-release_date = "2025-12-23"
-last_updated = "2025-12-23"
+name = "dracarys-llama-3.1-70b-instruct"
+release_date = "2024-09-11"
+last_updated = "2025-05-22"
 attachment = false
-reasoning = true
+reasoning = false
 temperature = true
 tool_call = true
 open_weights = true
@@ -13,8 +12,8 @@ input = 0.0
 output = 0.0
 
 [limit]
-context = 204_800
-output = 131_072
+context = 128_000
+output = 8_192
 
 [modalities]
 input = ["text"]
diff --git a/providers/nvidia/models/baai/bge-m3.toml b/providers/nvidia/models/baai/bge-m3.toml
new file mode 100644
index 000000000..1b5b1af4d
--- /dev/null
+++ b/providers/nvidia/models/baai/bge-m3.toml
@@ -0,0 +1,21 @@
+name = "BGE M3"
+family = "bge"
+release_date = "2024-01-30"
+last_updated = "2026-04-30"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 8_192
+output = 1_024
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml b/providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml
new file mode 100644
index 000000000..6b8c5b207
--- /dev/null
+++ b/providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml
@@ -0,0 +1,20 @@
+name = "FLUX.1-Kontext-dev"
+release_date = "2025-08-12"
+last_updated = "2025-08-12"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 40_960
+output = 40_960
+
+[modalities]
+input = ["text", "image"]
+output = ["image"]
diff --git a/providers/nvidia/models/black-forest-labs/flux_1-schnell.toml b/providers/nvidia/models/black-forest-labs/flux_1-schnell.toml
new file mode 100644
index 000000000..69f3a8c6e
--- /dev/null
+++ b/providers/nvidia/models/black-forest-labs/flux_1-schnell.toml
@@ -0,0 +1,23 @@
+name = "FLUX.1-schnell"
+release_date = "2024-08-01"
+last_updated = "2026-02-04"
+attachment = false
+reasoning = false
+temperature = false
+knowledge = "2024-07"
+tool_call = false
+structured_output = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 77
+input = 77
+output = 0
+
+[modalities]
+input = ["text"]
+output = ["image"]
diff --git a/providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml b/providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml
new file mode 100644
index 000000000..a4a946d96
--- /dev/null
+++ b/providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml
@@ -0,0 +1,22 @@
+name = "FLUX.2 Klein 4B"
+family = "flux"
+release_date = "2026-01-14"
+last_updated = "2026-01-31"
+attachment = false
+reasoning = false
+temperature = true
+knowledge = "2025-06"
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 40_960
+output = 40_960
+
+[modalities]
+input = ["image", "text"]
+output = ["image"]
diff --git a/providers/nvidia/models/nvidia/llama-3.1-nemotron-51b-instruct.toml b/providers/nvidia/models/bytedance/seed-oss-36b-instruct.toml
similarity index 50%
rename from providers/nvidia/models/nvidia/llama-3.1-nemotron-51b-instruct.toml
rename to providers/nvidia/models/bytedance/seed-oss-36b-instruct.toml
index 4a1cf4a75..18abad3db 100644
--- a/providers/nvidia/models/nvidia/llama-3.1-nemotron-51b-instruct.toml
+++ b/providers/nvidia/models/bytedance/seed-oss-36b-instruct.toml
@@ -1,20 +1,21 @@
-name = "Llama 3.1 Nemotron 51b Instruct"
+name = "ByteDance-Seed/Seed-OSS-36B-Instruct"
+family = "seed"
+release_date = "2025-09-04"
+last_updated = "2025-11-25"
 attachment = false
 reasoning = false
 temperature = true
 tool_call = true
 structured_output = true
-release_date = "2024-09-22"
-last_updated = "2024-09-22"
 open_weights = false
 
 [cost]
-input = 0.00
-output = 0.00
+input = 0.0
+output = 0.0
 
 [limit]
-context = 128000
-output = 4096
+context = 262_000
+output = 262_000
 
 [modalities]
 input = ["text"]
diff --git a/providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml b/providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml
deleted file mode 100644
index 4d653e528..000000000
--- a/providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Deepseek Coder 6.7b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2023-10-29"
-last_updated = "2023-10-29"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml b/providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml
deleted file mode 100644
index 912ad2316..000000000
--- a/providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Deepseek R1 0528"
-attachment = false
-reasoning = true
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2025-05-28"
-last_updated = "2025-05-28"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/deepseek-ai/deepseek-r1.toml b/providers/nvidia/models/deepseek-ai/deepseek-r1.toml
deleted file mode 100644
index f1f14ecec..000000000
--- a/providers/nvidia/models/deepseek-ai/deepseek-r1.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Deepseek R1"
-attachment = false
-reasoning = true
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2025-01-20"
-last_updated = "2025-01-20"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml b/providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml
deleted file mode 100644
index be1c07528..000000000
--- a/providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "DeepSeek V3.1"
-family = "deepseek"
-release_date = "2025-08-20"
-last_updated = "2025-08-26"
-attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-07"
-tool_call = true
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 128_000
-output = 8_192
-
-[modalities]
-input = ["text"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/google/codegemma-1.1-7b.toml b/providers/nvidia/models/google/codegemma-1.1-7b.toml
deleted file mode 100644
index ca07d5eca..000000000
--- a/providers/nvidia/models/google/codegemma-1.1-7b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Codegemma 1.1 7b"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2024-04-30"
-last_updated = "2024-04-30"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/codegemma-7b.toml b/providers/nvidia/models/google/codegemma-7b.toml
deleted file mode 100644
index ac3519b0e..000000000
--- a/providers/nvidia/models/google/codegemma-7b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Codegemma 7b"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2024-03-21"
-last_updated = "2024-03-21"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/gemma-2-27b-it.toml b/providers/nvidia/models/google/gemma-2-27b-it.toml
deleted file mode 100644
index 6bcc69a82..000000000
--- a/providers/nvidia/models/google/gemma-2-27b-it.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Gemma 2 27b It"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-06-24"
-last_updated = "2024-06-24"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/gemma-3-12b-it.toml b/providers/nvidia/models/google/gemma-3-12b-it.toml
deleted file mode 100644
index 4313e2edc..000000000
--- a/providers/nvidia/models/google/gemma-3-12b-it.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Gemma 3 12b It"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2025-03-01"
-last_updated = "2025-03-01"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/gemma-3-1b-it.toml b/providers/nvidia/models/google/gemma-3-1b-it.toml
deleted file mode 100644
index d10b23aae..000000000
--- a/providers/nvidia/models/google/gemma-3-1b-it.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Gemma 3 1b It"
-attachment = true
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2025-03-10"
-last_updated = "2025-03-10"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/google-paligemma.toml b/providers/nvidia/models/google/google-paligemma.toml
new file mode 100644
index 000000000..1d9047d17
--- /dev/null
+++ b/providers/nvidia/models/google/google-paligemma.toml
@@ -0,0 +1,20 @@
+name = "paligemma"
+release_date = "2024-05-14"
+last_updated = "2024-08-26"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/codellama-70b.toml b/providers/nvidia/models/meta/codellama-70b.toml
deleted file mode 100644
index 6380e16c8..000000000
--- a/providers/nvidia/models/meta/codellama-70b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Codellama 70b"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2024-01-29"
-last_updated = "2024-01-29"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/meta/esm2-650m.toml b/providers/nvidia/models/meta/esm2-650m.toml
new file mode 100644
index 000000000..9d5efcc9b
--- /dev/null
+++ b/providers/nvidia/models/meta/esm2-650m.toml
@@ -0,0 +1,20 @@
+name = "esm2-650m"
+release_date = "2024-08-29"
+last_updated = "2025-03-10"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/esmfold.toml b/providers/nvidia/models/meta/esmfold.toml
new file mode 100644
index 000000000..583e9e477
--- /dev/null
+++ b/providers/nvidia/models/meta/esmfold.toml
@@ -0,0 +1,20 @@
+name = "esmfold"
+release_date = "2024-03-15"
+last_updated = "2025-06-12"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-3.1-405b-instruct.toml b/providers/nvidia/models/meta/llama-3.1-405b-instruct.toml
deleted file mode 100644
index 6ae93e51c..000000000
--- a/providers/nvidia/models/meta/llama-3.1-405b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama 3.1 405b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-07-16"
-last_updated = "2024-07-16"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-3.1-8b-instruct.toml b/providers/nvidia/models/meta/llama-3.1-8b-instruct.toml
new file mode 100644
index 000000000..f9d66456c
--- /dev/null
+++ b/providers/nvidia/models/meta/llama-3.1-8b-instruct.toml
@@ -0,0 +1,22 @@
+name = "Llama 3.1 8B Instruct"
+family = "llama"
+release_date = "2025-01-01"
+last_updated = "2025-01-01"
+attachment = false
+reasoning = false
+temperature = true
+knowledge = "2023-12"
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 16_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-3.2-3b-instruct.toml b/providers/nvidia/models/meta/llama-3.2-3b-instruct.toml
new file mode 100644
index 000000000..92bcf2657
--- /dev/null
+++ b/providers/nvidia/models/meta/llama-3.2-3b-instruct.toml
@@ -0,0 +1,22 @@
+name = "Llama 3.2 3B Instruct"
+family = "llama"
+release_date = "2024-09-18"
+last_updated = "2024-09-18"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+structured_output = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 32_000
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml b/providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml
new file mode 100644
index 000000000..a9d4a5359
--- /dev/null
+++ b/providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml
@@ -0,0 +1,22 @@
+name = "Llama-3.2-90B-Vision-Instruct"
+family = "llama"
+release_date = "2024-09-25"
+last_updated = "2024-09-25"
+attachment = true
+reasoning = false
+temperature = true
+knowledge = "2023-12"
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml b/providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml
deleted file mode 100644
index 32c09ba2c..000000000
--- a/providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Llama 4 Scout 17b 16e Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2024-02"
-tool_call = true
-structured_output = true
-release_date = "2025-04-02"
-last_updated = "2025-04-02"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-guard-4-12b.toml b/providers/nvidia/models/meta/llama-guard-4-12b.toml
new file mode 100644
index 000000000..7dd608d72
--- /dev/null
+++ b/providers/nvidia/models/meta/llama-guard-4-12b.toml
@@ -0,0 +1,21 @@
+name = "Llama Guard 4 12B"
+family = "llama"
+release_date = "2025-04-05"
+last_updated = "2026-04-30"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 16_384
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml
deleted file mode 100644
index 3dd52a2c6..000000000
--- a/providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Phi 3 Medium 128k Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2023-10"
-tool_call = true
-structured_output = true
-release_date = "2024-05-07"
-last_updated = "2024-05-07"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml
deleted file mode 100644
index ef9a99bde..000000000
--- a/providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Phi 3 Medium 4k Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2023-10"
-tool_call = true
-structured_output = true
-release_date = "2024-05-07"
-last_updated = "2024-05-07"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 4000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml
deleted file mode 100644
index 85f8b1c64..000000000
--- a/providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Phi 3 Small 128k Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2023-10"
-tool_call = true
-structured_output = true
-release_date = "2024-05-07"
-last_updated = "2024-05-07"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml
deleted file mode 100644
index c05bda32e..000000000
--- a/providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Phi 3 Small 8k Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2023-10"
-tool_call = true
-structured_output = true
-release_date = "2024-05-07"
-last_updated = "2024-05-07"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 8000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml
deleted file mode 100644
index 06a8d3767..000000000
--- a/providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Phi 3 Vision 128k Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-05-19"
-last_updated = "2024-05-19"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml b/providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml
deleted file mode 100644
index 000096966..000000000
--- a/providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Phi 3.5 Moe Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-08-17"
-last_updated = "2024-08-17"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml b/providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml
deleted file mode 100644
index fe397e248..000000000
--- a/providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Phi 3.5 Vision Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-08-16"
-last_updated = "2024-08-16"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml b/providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml
new file mode 100644
index 000000000..975fbc15f
--- /dev/null
+++ b/providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml
@@ -0,0 +1,21 @@
+name = "Phi 4 Multimodal"
+release_date = "2025-07-26"
+last_updated = "2025-07-26"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+input = 128_000
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml b/providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml
deleted file mode 100644
index cb47b5c50..000000000
--- a/providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Codestral 22b Instruct V0.1"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-05-29"
-last_updated = "2024-05-29"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/mistralai/magistral-small-2506.toml b/providers/nvidia/models/mistralai/magistral-small-2506.toml
new file mode 100644
index 000000000..f1cc35f49
--- /dev/null
+++ b/providers/nvidia/models/mistralai/magistral-small-2506.toml
@@ -0,0 +1,21 @@
+name = "Magistral Small 2506"
+release_date = "2025-09-25"
+last_updated = "2025-09-25"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+input = 32_768
+output = 32_768
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml b/providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml
deleted file mode 100644
index c7c426c24..000000000
--- a/providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Mamba Codestral 7b V0.1"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2024-07-16"
-last_updated = "2024-07-16"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/meta/llama3-70b-instruct.toml b/providers/nvidia/models/mistralai/mistral-7b-instruct-v03.toml
similarity index 55%
rename from providers/nvidia/models/meta/llama3-70b-instruct.toml
rename to providers/nvidia/models/mistralai/mistral-7b-instruct-v03.toml
index c36500863..046e0f198 100644
--- a/providers/nvidia/models/meta/llama3-70b-instruct.toml
+++ b/providers/nvidia/models/mistralai/mistral-7b-instruct-v03.toml
@@ -1,20 +1,20 @@
-name = "Llama3 70b Instruct"
+name = "Mistral-7B-Instruct-v0.3"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
 attachment = false
 reasoning = false
 temperature = true
 tool_call = true
 structured_output = true
-release_date = "2024-04-17"
-last_updated = "2024-04-17"
 open_weights = true
 
 [cost]
-input = 0.00
-output = 0.00
+input = 0.0
+output = 0.0
 
 [limit]
-context = 128000
-output = 4096
+context = 65_536
+output = 65_536
 
 [modalities]
 input = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-large-2-instruct.toml b/providers/nvidia/models/mistralai/mistral-large-2-instruct.toml
deleted file mode 100644
index 3a754ff2c..000000000
--- a/providers/nvidia/models/mistralai/mistral-large-2-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Mistral Large 2 Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-07-24"
-last_updated = "2024-07-24"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml b/providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml
new file mode 100644
index 000000000..82c3f2c8a
--- /dev/null
+++ b/providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml
@@ -0,0 +1,22 @@
+name = "Mistral Medium 3"
+family = "mistral-medium"
+release_date = "2025-09-25"
+last_updated = "2025-09-25"
+attachment = true
+reasoning = false
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 131_072
+input = 131_072
+output = 32_768
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml b/providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml
deleted file mode 100644
index 8c3050905..000000000
--- a/providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-name = "Mistral Medium 3.5 128B"
-
-[extends]
-from = "mistral/mistral-medium-2604"
-
-[cost]
-input = 0.0
-output = 0.0
diff --git a/providers/nvidia/models/mistralai/mistral-nemotron.toml b/providers/nvidia/models/mistralai/mistral-nemotron.toml
new file mode 100644
index 000000000..3b6596259
--- /dev/null
+++ b/providers/nvidia/models/mistralai/mistral-nemotron.toml
@@ -0,0 +1,20 @@
+name = "mistral-nemotron"
+release_date = "2025-06-11"
+last_updated = "2025-06-12"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml b/providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml
deleted file mode 100644
index 8c2578dcf..000000000
--- a/providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Mistral Small 3.1 24b Instruct 2503"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2025-03-11"
-last_updated = "2025-03-11"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml b/providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml
new file mode 100644
index 000000000..d2d240011
--- /dev/null
+++ b/providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml
@@ -0,0 +1,20 @@
+name = "mistral-small-4-119b-2603"
+release_date = "2026-03-16"
+last_updated = "2026-03-16"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama3-8b-instruct.toml b/providers/nvidia/models/mistralai/mixtral-8x22b-instruct.toml
similarity index 65%
rename from providers/nvidia/models/meta/llama3-8b-instruct.toml
rename to providers/nvidia/models/mistralai/mixtral-8x22b-instruct.toml
index 9c3c20ca6..7e4282fa7 100644
--- a/providers/nvidia/models/meta/llama3-8b-instruct.toml
+++ b/providers/nvidia/models/mistralai/mixtral-8x22b-instruct.toml
@@ -1,20 +1,19 @@
-name = "Llama3 8b Instruct"
+name = "Mistral: Mixtral 8x22B Instruct"
+release_date = "2024-04-17"
+last_updated = "2024-04-17"
 attachment = false
 reasoning = false
 temperature = true
 tool_call = true
-structured_output = true
-release_date = "2024-04-17"
-last_updated = "2024-04-17"
 open_weights = true
 
 [cost]
-input = 0.00
-output = 0.00
+input = 0.0
+output = 0.0
 
 [limit]
-context = 128000
-output = 4096
+context = 65_536
+output = 13_108
 
 [modalities]
 input = ["text"]
diff --git a/providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml b/providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml
new file mode 100644
index 000000000..3c2c2607a
--- /dev/null
+++ b/providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml
@@ -0,0 +1,20 @@
+name = "Mistral: Mixtral 8x7B Instruct"
+release_date = "2023-12-10"
+last_updated = "2026-03-15"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/moonshotai/kimi-k2.5.toml b/providers/nvidia/models/moonshotai/kimi-k2.5.toml
deleted file mode 100644
index 5a3cd48cd..000000000
--- a/providers/nvidia/models/moonshotai/kimi-k2.5.toml
+++ /dev/null
@@ -1,26 +0,0 @@
-name = "Kimi K2.5"
-family = "kimi"
-release_date = "2026-01-27"
-last_updated = "2026-01-27"
-attachment = true
-reasoning = true
-temperature = true
-tool_call = true
-knowledge = "2025-07"
-open_weights = true
-
-[interleaved]
-field = "reasoning_content"
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 262_144
-output = 262_144
-
-[modalities]
-input = ["text", "image", "video"]
-output = ["text"]
-
diff --git a/providers/nvidia/models/nvidia/active-speaker-detection.toml b/providers/nvidia/models/nvidia/active-speaker-detection.toml
new file mode 100644
index 000000000..07de84258
--- /dev/null
+++ b/providers/nvidia/models/nvidia/active-speaker-detection.toml
@@ -0,0 +1,20 @@
+name = "Active Speaker Detection"
+release_date = "2026-04-16"
+last_updated = "2026-04-16"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/bevformer.toml b/providers/nvidia/models/nvidia/bevformer.toml
new file mode 100644
index 000000000..12f4584eb
--- /dev/null
+++ b/providers/nvidia/models/nvidia/bevformer.toml
@@ -0,0 +1,20 @@
+name = "bevformer"
+release_date = "2025-03-18"
+last_updated = "2025-07-20"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml b/providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml
deleted file mode 100644
index c0236140f..000000000
--- a/providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Cosmos Nemotron 34B"
-family = "nemotron"
-release_date = "2024-01-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-01"
-tool_call = false
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 131_072
-output = 8_192
-
-[modalities]
-input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/nvidia/cosmos-predict1-5b.toml b/providers/nvidia/models/nvidia/cosmos-predict1-5b.toml
new file mode 100644
index 000000000..a88700a24
--- /dev/null
+++ b/providers/nvidia/models/nvidia/cosmos-predict1-5b.toml
@@ -0,0 +1,20 @@
+name = "cosmos-predict1-5b"
+release_date = "2025-03-18"
+last_updated = "2025-03-18"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["video"]
diff --git a/providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml b/providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml
new file mode 100644
index 000000000..a13eb7b79
--- /dev/null
+++ b/providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml
@@ -0,0 +1,20 @@
+name = "cosmos-transfer1-7b"
+release_date = "2025-06-13"
+last_updated = "2025-06-30"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["video"]
diff --git a/providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml b/providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml
new file mode 100644
index 000000000..4ca125989
--- /dev/null
+++ b/providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml
@@ -0,0 +1,20 @@
+name = "cosmos-transfer2.5-2b"
+release_date = "2026-02-26"
+last_updated = "2026-02-26"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["video"]
diff --git a/providers/nvidia/models/nvidia/gliner-pii.toml b/providers/nvidia/models/nvidia/gliner-pii.toml
new file mode 100644
index 000000000..cbfcda621
--- /dev/null
+++ b/providers/nvidia/models/nvidia/gliner-pii.toml
@@ -0,0 +1,20 @@
+name = "gliner-pii"
+release_date = "2026-03-03"
+last_updated = "2026-03-03"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml b/providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml
deleted file mode 100644
index 559a42755..000000000
--- a/providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama 3.1 Nemotron 70b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-10-12"
-last_updated = "2024-10-12"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml b/providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml
deleted file mode 100644
index 055186b84..000000000
--- a/providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Llama-3.1-Nemotron-Ultra-253B-v1"
-family = "llama"
-release_date = "2024-07-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-07"
-tool_call = true
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 131_072
-output = 8_192
-
-[modalities]
-input = ["text"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml b/providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml
new file mode 100644
index 000000000..8eeff11e1
--- /dev/null
+++ b/providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml
@@ -0,0 +1,20 @@
+name = "llama-3.1-nemotron-safety-guard-8b-v3"
+release_date = "2025-10-28"
+last_updated = "2025-10-28"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-embed-nemotron-8b.toml b/providers/nvidia/models/nvidia/llama-3_2-nemoretriever-300m-embed-v1.toml
similarity index 52%
rename from providers/nvidia/models/nvidia/llama-embed-nemotron-8b.toml
rename to providers/nvidia/models/nvidia/llama-3_2-nemoretriever-300m-embed-v1.toml
index 237b83a0c..d8b54900e 100644
--- a/providers/nvidia/models/nvidia/llama-embed-nemotron-8b.toml
+++ b/providers/nvidia/models/nvidia/llama-3_2-nemoretriever-300m-embed-v1.toml
@@ -1,13 +1,11 @@
-name = "Llama Embed Nemotron 8B"
-family = "llama"
-release_date = "2025-03-18"
-last_updated = "2025-03-18"
+name = "llama-3_2-nemoretriever-300m-embed-v1"
+release_date = "2025-07-24"
+last_updated = "2025-07-24"
 attachment = false
 reasoning = false
 temperature = false
-knowledge = "2025-03"
 tool_call = false
-open_weights = false
+open_weights = true
 
 [cost]
 input = 0.0
@@ -19,4 +17,4 @@ output = 2_048
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml b/providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml
new file mode 100644
index 000000000..6f0355b42
--- /dev/null
+++ b/providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml
@@ -0,0 +1,20 @@
+name = "llama-nemotron-embed-vl-1b-v2"
+release_date = "2026-02-10"
+last_updated = "2026-02-10"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 2_048
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml b/providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml
new file mode 100644
index 000000000..87ef037fa
--- /dev/null
+++ b/providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml
@@ -0,0 +1,20 @@
+name = "llama-nemotron-rerank-vl-1b-v2"
+release_date = "2026-03-31"
+last_updated = "2026-03-31"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml b/providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml
deleted file mode 100644
index 5bef2305b..000000000
--- a/providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama3 Chatqa 1.5 70b"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-04-28"
-last_updated = "2024-04-28"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml b/providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml
new file mode 100644
index 000000000..2b3b93ab0
--- /dev/null
+++ b/providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml
@@ -0,0 +1,20 @@
+name = "magpie-tts-zeroshot"
+release_date = "2025-05-22"
+last_updated = "2025-06-12"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text", "audio"]
+output = ["audio"]
diff --git a/providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml b/providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml
deleted file mode 100644
index 9f26895d9..000000000
--- a/providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "NeMo Retriever OCR v1"
-family = "nemoretriever"
-release_date = "2024-01-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = false
-temperature = false
-knowledge = "2024-01"
-tool_call = false
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 0
-output = 4096
-
-[modalities]
-input = ["image"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/nvidia/nemotron-3-content-safety.toml b/providers/nvidia/models/nvidia/nemotron-3-content-safety.toml
new file mode 100644
index 000000000..41e3ee707
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nemotron-3-content-safety.toml
@@ -0,0 +1,20 @@
+name = "nemotron-3-content-safety"
+release_date = "2026-04-16"
+last_updated = "2026-04-16"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml b/providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml
deleted file mode 100644
index db129f998..000000000
--- a/providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Nemotron 4 340b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-06-13"
-last_updated = "2024-06-13"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml b/providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml
new file mode 100644
index 000000000..43a2a7897
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml
@@ -0,0 +1,20 @@
+name = "nemotron-content-safety-reasoning-4b"
+release_date = "2026-01-22"
+last_updated = "2026-01-22"
+attachment = false
+reasoning = true
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml b/providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml
new file mode 100644
index 000000000..1cf210303
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml
@@ -0,0 +1,20 @@
+name = "nemotron-mini-4b-instruct"
+release_date = "2024-08-21"
+last_updated = "2024-08-26"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/mistralai/ministral-14b-instruct-2512.toml b/providers/nvidia/models/nvidia/nemotron-nano-12b-v2-vl.toml
similarity index 51%
rename from providers/nvidia/models/mistralai/ministral-14b-instruct-2512.toml
rename to providers/nvidia/models/nvidia/nemotron-nano-12b-v2-vl.toml
index 6209d1109..65df14a01 100644
--- a/providers/nvidia/models/mistralai/ministral-14b-instruct-2512.toml
+++ b/providers/nvidia/models/nvidia/nemotron-nano-12b-v2-vl.toml
@@ -1,13 +1,12 @@
-name = "Ministral 3 14B Instruct 2512"
-family = "ministral"
+name = "Nemotron Nano 12B v2 VL"
+family = "nemotron"
+release_date = "2025-12-01"
+last_updated = "2026-04-30"
 attachment = true
-reasoning = false
-tool_call = true
-structured_output = true
+reasoning = true
 temperature = true
-knowledge = "2025-12"
-release_date = "2025-12-01"
-last_updated = "2025-12-08"
+knowledge = "2024-10"
+tool_call = true
 open_weights = true
 
 [cost]
@@ -15,8 +14,8 @@ input = 0.0
 output = 0.0
 
 [limit]
-context = 262_144
-output = 262_144
+context = 128_000
+output = 16_384
 
 [modalities]
 input = ["text", "image"]
diff --git a/providers/nvidia/models/nvidia/nemotron-voicechat.toml b/providers/nvidia/models/nvidia/nemotron-voicechat.toml
new file mode 100644
index 000000000..9889f4cf4
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nemotron-voicechat.toml
@@ -0,0 +1,20 @@
+name = "nemotron-voicechat"
+release_date = "2026-03-16"
+last_updated = "2026-03-16"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text", "audio"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nv-embed-v1.toml b/providers/nvidia/models/nvidia/nv-embed-v1.toml
new file mode 100644
index 000000000..9d5bcf6c0
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nv-embed-v1.toml
@@ -0,0 +1,20 @@
+name = "nv-embed-v1"
+release_date = "2024-06-07"
+last_updated = "2025-07-22"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 2_048
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml b/providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml
new file mode 100644
index 000000000..2063d1700
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml
@@ -0,0 +1,20 @@
+name = "nv-embedcode-7b-v1"
+release_date = "2025-03-17"
+last_updated = "2025-05-29"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 2_048
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml b/providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml
deleted file mode 100644
index 18a253f98..000000000
--- a/providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Parakeet TDT 0.6B v2"
-family = "parakeet"
-release_date = "2024-01-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = false
-temperature = false
-knowledge = "2024-01"
-tool_call = false
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 0
-output = 4096
-
-[modalities]
-input = ["audio"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml b/providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml
new file mode 100644
index 000000000..7e26db372
--- /dev/null
+++ b/providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml
@@ -0,0 +1,20 @@
+name = "rerank-qa-mistral-4b"
+release_date = "2024-03-17"
+last_updated = "2025-01-17"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml b/providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml
new file mode 100644
index 000000000..da0affc1d
--- /dev/null
+++ b/providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml
@@ -0,0 +1,20 @@
+name = "riva-translate-4b-instruct-v1_1"
+release_date = "2025-12-12"
+last_updated = "2025-12-12"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/sparsedrive.toml b/providers/nvidia/models/nvidia/sparsedrive.toml
new file mode 100644
index 000000000..ca6c53667
--- /dev/null
+++ b/providers/nvidia/models/nvidia/sparsedrive.toml
@@ -0,0 +1,20 @@
+name = "sparsedrive"
+release_date = "2025-03-18"
+last_updated = "2025-07-20"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/streampetr.toml b/providers/nvidia/models/nvidia/streampetr.toml
new file mode 100644
index 000000000..df147fa72
--- /dev/null
+++ b/providers/nvidia/models/nvidia/streampetr.toml
@@ -0,0 +1,20 @@
+name = "streampetr"
+release_date = "2025-11-13"
+last_updated = "2025-11-13"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/studiovoice.toml b/providers/nvidia/models/nvidia/studiovoice.toml
new file mode 100644
index 000000000..07f672842
--- /dev/null
+++ b/providers/nvidia/models/nvidia/studiovoice.toml
@@ -0,0 +1,20 @@
+name = "studiovoice"
+release_date = "2024-10-03"
+last_updated = "2025-06-13"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/synthetic-video-detector.toml b/providers/nvidia/models/nvidia/synthetic-video-detector.toml
new file mode 100644
index 000000000..08fdf583a
--- /dev/null
+++ b/providers/nvidia/models/nvidia/synthetic-video-detector.toml
@@ -0,0 +1,20 @@
+name = "synthetic-video-detector"
+release_date = "2026-04-16"
+last_updated = "2026-04-16"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/usdcode.toml b/providers/nvidia/models/nvidia/usdcode.toml
new file mode 100644
index 000000000..977eec10c
--- /dev/null
+++ b/providers/nvidia/models/nvidia/usdcode.toml
@@ -0,0 +1,20 @@
+name = "usdcode"
+release_date = "2026-01-01"
+last_updated = "2026-01-01"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/usdvalidate.toml b/providers/nvidia/models/nvidia/usdvalidate.toml
new file mode 100644
index 000000000..7149c82e2
--- /dev/null
+++ b/providers/nvidia/models/nvidia/usdvalidate.toml
@@ -0,0 +1,20 @@
+name = "usdvalidate"
+release_date = "2024-07-24"
+last_updated = "2025-01-08"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/z-ai/glm5.toml b/providers/nvidia/models/openai/gpt-oss-20b.toml
similarity index 56%
rename from providers/nvidia/models/z-ai/glm5.toml
rename to providers/nvidia/models/openai/gpt-oss-20b.toml
index 6d484070f..488196695 100644
--- a/providers/nvidia/models/z-ai/glm5.toml
+++ b/providers/nvidia/models/openai/gpt-oss-20b.toml
@@ -1,7 +1,7 @@
-name = "GLM5"
-family = "glm"
-release_date = "2026-02-12"
-last_updated = "2026-02-12"
+name = "GPT OSS 20B"
+family = "gpt-oss"
+release_date = "2025-08-05"
+last_updated = "2025-08-05"
 attachment = false
 reasoning = true
 temperature = true
@@ -9,16 +9,13 @@ tool_call = true
 structured_output = true
 open_weights = true
 
-[interleaved]
-field = "reasoning_content"
-
 [cost]
 input = 0.0
 output = 0.0
 
 [limit]
-context = 202752
-output = 131000
+context = 131_072
+output = 32_768
 
 [modalities]
 input = ["text"]
diff --git a/providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml b/providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml
deleted file mode 100644
index 269dbbf8b..000000000
--- a/providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Qwen2.5 Coder 7b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-09-17"
-last_updated = "2024-09-17"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/qwen/qwen3-235b-a22b.toml b/providers/nvidia/models/qwen/qwen3-235b-a22b.toml
deleted file mode 100644
index 1b2875b46..000000000
--- a/providers/nvidia/models/qwen/qwen3-235b-a22b.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Qwen3-235B-A22B"
-family = "qwen"
-release_date = "2024-12-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-12"
-tool_call = true
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 131_072
-output = 8_192
-
-[modalities]
-input = ["text"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml b/providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml
new file mode 100644
index 000000000..2e3995a2d
--- /dev/null
+++ b/providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml
@@ -0,0 +1,22 @@
+name = "Qwen3.5 122B-A10B"
+family = "qwen"
+release_date = "2026-02-23"
+last_updated = "2026-02-23"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+structured_output = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 262_144
+output = 65_536
+
+[modalities]
+input = ["text", "image", "video", "audio"]
+output = ["text"]
diff --git a/providers/nvidia/models/qwen/qwq-32b.toml b/providers/nvidia/models/qwen/qwq-32b.toml
deleted file mode 100644
index 15ebe81d4..000000000
--- a/providers/nvidia/models/qwen/qwq-32b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Qwq 32b"
-attachment = false
-reasoning = true
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2025-03-05"
-last_updated = "2025-03-05"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/sarvamai/sarvam-m.toml b/providers/nvidia/models/sarvamai/sarvam-m.toml
new file mode 100644
index 000000000..92dac313e
--- /dev/null
+++ b/providers/nvidia/models/sarvamai/sarvam-m.toml
@@ -0,0 +1,20 @@
+name = "sarvam-m"
+release_date = "2025-07-25"
+last_updated = "2025-07-25"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/upstage/solar-10_7b-instruct.toml b/providers/nvidia/models/upstage/solar-10_7b-instruct.toml
new file mode 100644
index 000000000..62e245ed2
--- /dev/null
+++ b/providers/nvidia/models/upstage/solar-10_7b-instruct.toml
@@ -0,0 +1,20 @@
+name = "solar-10.7b-instruct"
+release_date = "2024-06-05"
+last_updated = "2025-04-10"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]

From 8e19ec580cb92d4f83f644aacb0e62d00871d316 Mon Sep 17 00:00:00 2001
From: Isaac Ng <5kahoisaac@gmail.com>
Date: Sun, 3 May 2026 15:19:17 +0800
Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=93=A6=20CHORE:=20sync=20latest=20nvi?=
 =?UTF-8?q?dia=20model?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .serena/.gitignore                            |   2 +
 .serena/project.yml                           | 119 ++++++++++++++++++
 .../nvidia/models/moonshotai/kimi-k2.6.toml   |  26 ++++
 .../llama-3.3-nemotron-super-49b-v1.toml      |  21 ----
 .../nvidia/models/qwen/qwen-image-edit.toml   |  22 ++++
 providers/nvidia/models/qwen/qwen-image.toml  |  22 ++++
 6 files changed, 191 insertions(+), 21 deletions(-)
 create mode 100644 .serena/.gitignore
 create mode 100644 .serena/project.yml
 create mode 100644 providers/nvidia/models/moonshotai/kimi-k2.6.toml
 delete mode 100644 providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml
 create mode 100644 providers/nvidia/models/qwen/qwen-image-edit.toml
 create mode 100644 providers/nvidia/models/qwen/qwen-image.toml

diff --git a/.serena/.gitignore b/.serena/.gitignore
new file mode 100644
index 000000000..2e510aff5
--- /dev/null
+++ b/.serena/.gitignore
@@ -0,0 +1,2 @@
+/cache
+/project.local.yml
diff --git a/.serena/project.yml b/.serena/project.yml
new file mode 100644
index 000000000..6da0e6b8b
--- /dev/null
+++ b/.serena/project.yml
@@ -0,0 +1,119 @@
+# the name by which the project can be referenced within Serena
+project_name: "models.dev"
+
+
+# list of languages for which language servers are started; choose from:
+#   al                  ansible             bash                clojure             cpp
+#   cpp_ccls            crystal             csharp              csharp_omnisharp    dart
+#   elixir              elm                 erlang              fortran             fsharp
+#   go                  groovy              haskell             haxe                hlsl
+#   java                json                julia               kotlin              lean4
+#   lua                 luau                markdown            matlab              msl
+#   nix                 ocaml               pascal              perl                php
+#   php_phpactor        powershell          python              python_jedi         python_ty
+#   r                   rego                ruby                ruby_solargraph     rust
+#   scala               solidity            swift               systemverilog       terraform
+#   toml                typescript          typescript_vts      vue                 yaml
+#   zig
+#   (This list may be outdated. For the current list, see values of Language enum here:
+#   https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py
+#   For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.)
+# Note:
+#   - For C, use cpp
+#   - For JavaScript, use typescript
+#   - For Free Pascal/Lazarus, use pascal
+# Special requirements:
+#   Some languages require additional setup/installations.
+#   See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers
+# When using multiple languages, the first language server that supports a given file will be used for that file.
+# The first language is the default language and the respective language server will be used as a fallback.
+# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
+languages:
+- typescript
+
+# the encoding used by text files in the project
+# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings
+encoding: "utf-8"
+
+# line ending convention to use when writing source files.
+# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default)
+# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings.
+line_ending:
+
+# The language backend to use for this project.
+# If not set, the global setting from serena_config.yml is used.
+# Valid values: LSP, JetBrains
+# Note: the backend is fixed at startup. If a project with a different backend
+# is activated post-init, an error will be returned.
+language_backend:
+
+# whether to use project's .gitignore files to ignore files
+ignore_all_files_in_gitignore: true
+
+# advanced configuration option allowing to configure language server-specific options.
+# Maps the language key to the options.
+# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available.
+# No documentation on options means no options are available.
+ls_specific_settings: {}
+
+# list of additional paths to ignore in this project.
+# Same syntax as gitignore, so you can use * and **.
+# Note: global ignored_paths from serena_config.yml are also applied additively.
+ignored_paths: []
+
+# whether the project is in read-only mode
+# If set to true, all editing tools will be disabled and attempts to use them will result in an error
+# Added on 2025-04-18
+read_only: false
+
+# list of tool names to exclude.
+# This extends the existing exclusions (e.g. from the global configuration)
+# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
+excluded_tools: []
+
+# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default).
+# This extends the existing inclusions (e.g. from the global configuration).
+# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
+included_optional_tools: []
+
+# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools.
+# This cannot be combined with non-empty excluded_tools or included_optional_tools.
+# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
+fixed_tools: []
+
+# list of mode names that are to be activated by default, overriding the setting in the global configuration.
+# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes.
+# If the setting is undefined/empty, the default_modes from the global configuration (serena_config.yml) apply.
+# Otherwise, this overrides the setting from the global configuration (serena_config.yml).
+# Therefore, you can set this to [] if you do not want the default modes defined in the global config to apply
+# for this project.
+# This setting can, in turn, be overridden by CLI parameters (--mode).
+# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes
+default_modes:
+
+# list of mode names to be activated additionally for this project, e.g. ["query-projects"]
+# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes.
+# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes
+added_modes:
+
+# initial prompt for the project. It will always be given to the LLM upon activating the project
+# (contrary to the memories, which are loaded on demand).
+initial_prompt: ""
+
+# time budget (seconds) per tool call for the retrieval of additional symbol information
+# such as docstrings or parameter information.
+# This overrides the corresponding setting in the global configuration; see the documentation there.
+# If null or missing, use the setting from the global configuration.
+symbol_info_budget:
+
+# list of regex patterns which, when matched, mark a memory entry as read‑only.
+# Extends the list from the global configuration, merging the two lists.
+read_only_memory_patterns: []
+
+# list of regex patterns for memories to completely ignore.
+# Matching memories will not appear in list_memories or activate_project output
+# and cannot be accessed via read_memory or write_memory.
+# To access ignored memory files, use the read_file tool on the raw file path.
+# Extends the list from the global configuration, merging the two lists.
+# Example: ["_archive/.*", "_episodes/.*"]
+ignored_memory_patterns: []
diff --git a/providers/nvidia/models/moonshotai/kimi-k2.6.toml b/providers/nvidia/models/moonshotai/kimi-k2.6.toml
new file mode 100644
index 000000000..f89535dec
--- /dev/null
+++ b/providers/nvidia/models/moonshotai/kimi-k2.6.toml
@@ -0,0 +1,26 @@
+name = "Kimi K2.6"
+family = "kimi-k2.6"
+release_date = "2026-04-21"
+last_updated = "2026-04-21"
+attachment = true
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[interleaved]
+field = "reasoning_content"
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml b/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml
deleted file mode 100644
index 63c9b2713..000000000
--- a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama 3.3 Nemotron Super 49b V1"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2025-03-16"
-last_updated = "2025-03-16"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/qwen/qwen-image-edit.toml b/providers/nvidia/models/qwen/qwen-image-edit.toml
new file mode 100644
index 000000000..d5dcfb9f5
--- /dev/null
+++ b/providers/nvidia/models/qwen/qwen-image-edit.toml
@@ -0,0 +1,22 @@
+name = "Qwen Image Edit"
+family = "qwen"
+release_date = "2025-08-19"
+last_updated = "2025-08-19"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 0
+
+[modalities]
+input = ["text", "image"]
+output = ["image"]
diff --git a/providers/nvidia/models/qwen/qwen-image.toml b/providers/nvidia/models/qwen/qwen-image.toml
new file mode 100644
index 000000000..8fb876d8e
--- /dev/null
+++ b/providers/nvidia/models/qwen/qwen-image.toml
@@ -0,0 +1,22 @@
+name = "Qwen Image"
+family = "qwen"
+release_date = "2025-08-07"
+last_updated = "2025-08-07"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 0
+
+[modalities]
+input = ["text", "image"]
+output = ["image"]

From ab2eb51b4ebc1115b9c51a69c7b827f033674aa0 Mon Sep 17 00:00:00 2001
From: Isaac Ng <5kahoisaac@gmail.com>
Date: Sun, 3 May 2026 15:47:19 +0800
Subject: [PATCH 3/4] chore(nvidia): align Nemotron endpoint slugs

Replace stale NVIDIA Nemotron entries with the live Build catalog slugs so the local provider catalog matches current free and partner endpoints.
---
 .../llama-3.3-nemotron-super-49b-v1.5.toml    | 21 ------------------
 .../llama-3_3-nemotron-super-49b-v1.toml      | 22 +++++++++++++++++++
 .../llama-3_3-nemotron-super-49b-v1_5.toml    | 22 +++++++++++++++++++
 .../nvidia/nemotron-nano-12b-v2-vl.toml       | 22 -------------------
 4 files changed, 44 insertions(+), 43 deletions(-)
 delete mode 100644 providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml
 create mode 100644 providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml
 create mode 100644 providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml
 delete mode 100644 providers/nvidia/models/nvidia/nemotron-nano-12b-v2-vl.toml

diff --git a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml b/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml
deleted file mode 100644
index 46b9ac8c0..000000000
--- a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama 3.3 Nemotron Super 49b V1.5"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2025-03-16"
-last_updated = "2025-03-16"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml
new file mode 100644
index 000000000..4bf5d3668
--- /dev/null
+++ b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml
@@ -0,0 +1,22 @@
+name = "Llama 3.3 Nemotron Super 49B v1"
+family = "nemotron"
+release_date = "2025-04-07"
+last_updated = "2025-04-07"
+attachment = false
+reasoning = true
+temperature = true
+knowledge = "2023-12"
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml
new file mode 100644
index 000000000..215c88415
--- /dev/null
+++ b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml
@@ -0,0 +1,22 @@
+name = "Llama 3.3 Nemotron Super 49B v1.5"
+family = "nemotron"
+release_date = "2025-07-25"
+last_updated = "2025-07-25"
+attachment = false
+reasoning = true
+temperature = true
+knowledge = "2023-12"
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nemotron-nano-12b-v2-vl.toml b/providers/nvidia/models/nvidia/nemotron-nano-12b-v2-vl.toml
deleted file mode 100644
index 65df14a01..000000000
--- a/providers/nvidia/models/nvidia/nemotron-nano-12b-v2-vl.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Nemotron Nano 12B v2 VL"
-family = "nemotron"
-release_date = "2025-12-01"
-last_updated = "2026-04-30"
-attachment = true
-reasoning = true
-temperature = true
-knowledge = "2024-10"
-tool_call = true
-open_weights = true
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 128_000
-output = 16_384
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]

From c5fbcc2c9b2050e176bc346ad1d73ea1b9b1bd3f Mon Sep 17 00:00:00 2001
From: Isaac Ng <5kahoisaac@gmail.com>
Date: Sun, 3 May 2026 16:17:37 +0800
Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=93=A6=20CHORE:=20remove=20senera?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .serena/.gitignore  |   2 -
 .serena/project.yml | 119 --------------------------------------------
 2 files changed, 121 deletions(-)
 delete mode 100644 .serena/.gitignore
 delete mode 100644 .serena/project.yml

diff --git a/.serena/.gitignore b/.serena/.gitignore
deleted file mode 100644
index 2e510aff5..000000000
--- a/.serena/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-/cache
-/project.local.yml
diff --git a/.serena/project.yml b/.serena/project.yml
deleted file mode 100644
index 6da0e6b8b..000000000
--- a/.serena/project.yml
+++ /dev/null
@@ -1,119 +0,0 @@
-# the name by which the project can be referenced within Serena
-project_name: "models.dev"
-
-
-# list of languages for which language servers are started; choose from:
-#   al                  ansible             bash                clojure             cpp
-#   cpp_ccls            crystal             csharp              csharp_omnisharp    dart
-#   elixir              elm                 erlang              fortran             fsharp
-#   go                  groovy              haskell             haxe                hlsl
-#   java                json                julia               kotlin              lean4
-#   lua                 luau                markdown            matlab              msl
-#   nix                 ocaml               pascal              perl                php
-#   php_phpactor        powershell          python              python_jedi         python_ty
-#   r                   rego                ruby                ruby_solargraph     rust
-#   scala               solidity            swift               systemverilog       terraform
-#   toml                typescript          typescript_vts      vue                 yaml
-#   zig
-#   (This list may be outdated. For the current list, see values of Language enum here:
-#   https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py
-#   For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.)
-# Note:
-#   - For C, use cpp
-#   - For JavaScript, use typescript
-#   - For Free Pascal/Lazarus, use pascal
-# Special requirements:
-#   Some languages require additional setup/installations.
-#   See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers
-# When using multiple languages, the first language server that supports a given file will be used for that file.
-# The first language is the default language and the respective language server will be used as a fallback.
-# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
-languages:
-- typescript
-
-# the encoding used by text files in the project
-# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings
-encoding: "utf-8"
-
-# line ending convention to use when writing source files.
-# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default)
-# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings.
-line_ending:
-
-# The language backend to use for this project.
-# If not set, the global setting from serena_config.yml is used.
-# Valid values: LSP, JetBrains
-# Note: the backend is fixed at startup. If a project with a different backend
-# is activated post-init, an error will be returned.
-language_backend:
-
-# whether to use project's .gitignore files to ignore files
-ignore_all_files_in_gitignore: true
-
-# advanced configuration option allowing to configure language server-specific options.
-# Maps the language key to the options.
-# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available.
-# No documentation on options means no options are available.
-ls_specific_settings: {}
-
-# list of additional paths to ignore in this project.
-# Same syntax as gitignore, so you can use * and **.
-# Note: global ignored_paths from serena_config.yml are also applied additively.
-ignored_paths: []
-
-# whether the project is in read-only mode
-# If set to true, all editing tools will be disabled and attempts to use them will result in an error
-# Added on 2025-04-18
-read_only: false
-
-# list of tool names to exclude.
-# This extends the existing exclusions (e.g. from the global configuration)
-# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
-excluded_tools: []
-
-# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default).
-# This extends the existing inclusions (e.g. from the global configuration).
-# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
-included_optional_tools: []
-
-# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools.
-# This cannot be combined with non-empty excluded_tools or included_optional_tools.
-# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
-fixed_tools: []
-
-# list of mode names that are to be activated by default, overriding the setting in the global configuration.
-# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes.
-# If the setting is undefined/empty, the default_modes from the global configuration (serena_config.yml) apply.
-# Otherwise, this overrides the setting from the global configuration (serena_config.yml).
-# Therefore, you can set this to [] if you do not want the default modes defined in the global config to apply
-# for this project.
-# This setting can, in turn, be overridden by CLI parameters (--mode).
-# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes
-default_modes:
-
-# list of mode names to be activated additionally for this project, e.g. ["query-projects"]
-# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes.
-# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes
-added_modes:
-
-# initial prompt for the project. It will always be given to the LLM upon activating the project
-# (contrary to the memories, which are loaded on demand).
-initial_prompt: ""
-
-# time budget (seconds) per tool call for the retrieval of additional symbol information
-# such as docstrings or parameter information.
-# This overrides the corresponding setting in the global configuration; see the documentation there.
-# If null or missing, use the setting from the global configuration.
-symbol_info_budget:
-
-# list of regex patterns which, when matched, mark a memory entry as read‑only.
-# Extends the list from the global configuration, merging the two lists.
-read_only_memory_patterns: []
-
-# list of regex patterns for memories to completely ignore.
-# Matching memories will not appear in list_memories or activate_project output
-# and cannot be accessed via read_memory or write_memory.
-# To access ignored memory files, use the read_file tool on the raw file path.
-# Extends the list from the global configuration, merging the two lists.
-# Example: ["_archive/.*", "_episodes/.*"]
-ignored_memory_patterns: []