Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ We support all major LLM providers and models for inference. You need to set the
|----------|-------------------------------|------------------|
| OptiLLM | `OPTILLM_API_KEY` | Uses the inbuilt local server for inference, supports logprobs and decoding techniques like `cot_decoding` & `entropy_decoding` |
| OpenAI | `OPENAI_API_KEY` | You can use this with any OpenAI compatible endpoint (e.g. OpenRouter) by setting the `base_url` |
| MiniMax | `MINIMAX_API_KEY` | Uses the [MiniMax API](https://www.minimax.io/) (OpenAI-compatible). Supports MiniMax-M2.7 and other models. Temperature is auto-clamped to (0, 1] |
| Cerebras | `CEREBRAS_API_KEY` | You can use this for fast inference with supported models, see [docs for details](https://inference-docs.cerebras.ai/introduction) |
| Azure OpenAI | `AZURE_OPENAI_API_KEY`<br>`AZURE_API_VERSION`<br>`AZURE_API_BASE` | - |
| Azure OpenAI (Managed Identity) | `AZURE_API_VERSION`<br>`AZURE_API_BASE` | Login required using `az login`, see [docs for details](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity) |
Expand Down
16 changes: 16 additions & 0 deletions optillm/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ def get_config():
default_client = Cerebras(api_key=API_KEY, base_url=base_url, http_client=http_client)
else:
default_client = Cerebras(api_key=API_KEY, http_client=http_client)
elif os.environ.get("MINIMAX_API_KEY"):
API_KEY = os.environ.get("MINIMAX_API_KEY")
base_url = server_config['base_url']
if base_url == "":
base_url = "https://api.minimax.io/v1"
default_client = OpenAI(api_key=API_KEY, base_url=base_url, http_client=http_client)
logger.info(f"Created MiniMax client with base_url: {base_url}")
elif os.environ.get("OPENAI_API_KEY"):
API_KEY = os.environ.get("OPENAI_API_KEY")
base_url = server_config['base_url']
Expand Down Expand Up @@ -759,6 +766,15 @@ def proxy():
base_url = server_config['base_url']
default_client, api_key = get_config()

# Clamp temperature for MiniMax provider: must be in (0.0, 1.0]
if os.environ.get("MINIMAX_API_KEY") and 'temperature' in request_config:
temp = request_config['temperature']
if temp is not None:
if temp <= 0:
request_config['temperature'] = 0.01
elif temp > 1.0:
request_config['temperature'] = 1.0

operation, approaches, model = parse_combined_approach(model, known_approaches, plugin_approaches)

# Start conversation logging if enabled
Expand Down
87 changes: 87 additions & 0 deletions tests/test_minimax_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
Integration tests for MiniMax provider support in optillm.

These tests verify that the MiniMax provider works end-to-end with the actual
MiniMax API. They require a valid MINIMAX_API_KEY environment variable.

Run with: MINIMAX_API_KEY=your-key pytest tests/test_minimax_integration.py -v
"""

import unittest
import os
import sys

# Add parent directory to path to import optillm modules
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Skip all tests if MINIMAX_API_KEY is not set
MINIMAX_API_KEY = os.environ.get('MINIMAX_API_KEY', '')
SKIP_REASON = "MINIMAX_API_KEY environment variable not set"


@unittest.skipUnless(MINIMAX_API_KEY, SKIP_REASON)
class TestMiniMaxIntegration(unittest.TestCase):
"""Integration tests that call the actual MiniMax API."""

def setUp(self):
"""Set up OpenAI client pointing to MiniMax API."""
from openai import OpenAI
self.client = OpenAI(
api_key=MINIMAX_API_KEY,
base_url="https://api.minimax.io/v1"
)

def test_basic_completion(self):
"""Test basic chat completion with MiniMax API."""
response = self.client.chat.completions.create(
model="MiniMax-M2.7",
messages=[
{"role": "user", "content": "Say hello in one word."}
],
max_tokens=10,
temperature=0.7
)

assert hasattr(response, 'choices')
assert len(response.choices) > 0
assert response.choices[0].message.content is not None
assert len(response.choices[0].message.content) > 0

def test_temperature_boundary(self):
"""Test that MiniMax API accepts temperature at boundary value 0.01."""
response = self.client.chat.completions.create(
model="MiniMax-M2.7",
messages=[
{"role": "user", "content": "What is 2+2?"}
],
max_tokens=10,
temperature=0.01
)

assert hasattr(response, 'choices')
assert len(response.choices) > 0

def test_streaming_completion(self):
"""Test streaming chat completion with MiniMax API."""
stream = self.client.chat.completions.create(
model="MiniMax-M2.7",
messages=[
{"role": "user", "content": "Count from 1 to 3."}
],
max_tokens=30,
temperature=0.5,
stream=True
)

chunks = list(stream)
assert len(chunks) > 0
content_chunks = [
chunk.choices[0].delta.content
for chunk in chunks
if chunk.choices[0].delta.content
]
assert len(content_chunks) > 0


if __name__ == '__main__':
unittest.main()
Loading
Loading