Documentation Index
Fetch the complete documentation index at: https://docs.savegate.ai/llms.txt
Use this file to discover all available pages before exploring further.
Overview
Different models excel at different tasks. Use multiple models to optimize for performance, cost, and quality.Model Router Pattern
from openai import OpenAI
client = OpenAI(
api_key="sk-savegate-xxxxxxxxxxxxx",
base_url="https://api.savegate.ai/v1"
)
class ModelRouter:
"""Route requests to appropriate models based on task type"""
MODELS = {
"fast": "gpt-5.1-mini",
"smart": "gpt-5.1",
"code": "claude-sonnet-4.5",
"reasoning": "o3",
"cheap": "gpt-4.1-nano"
}
def chat(self, message, task_type="fast"):
model = self.MODELS.get(task_type, self.MODELS["fast"])
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": message}]
)
return response.choices[0].message.content
# Usage
router = ModelRouter()
# Use fast model for simple questions
answer = router.chat("What is 2+2?", task_type="fast")
# Use smart model for complex reasoning
analysis = router.chat("Analyze the implications of quantum computing", task_type="smart")
# Use code model for programming
code = router.chat("Write a binary search function", task_type="code")
Fallback Chain
def chat_with_fallback(message, models=None):
"""Try multiple models in order until one succeeds"""
if models is None:
models = [
"gpt-5.1",
"claude-sonnet-4.5",
"gpt-4.2"
]
last_error = None
for model in models:
try:
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": message}],
timeout=30
)
return {
"model": model,
"content": response.choices[0].message.content
}
except Exception as e:
last_error = e
print(f"Model {model} failed: {e}")
continue
raise Exception(f"All models failed. Last error: {last_error}")
# Usage
result = chat_with_fallback("Tell me about AI")
print(f"Response from {result['model']}: {result['content']}")
Consensus Pattern
Get multiple model opinions and combine them:import asyncio
from openai import AsyncOpenAI
async_client = AsyncOpenAI(
api_key="sk-savegate-xxxxxxxxxxxxx",
base_url="https://api.savegate.ai/v1"
)
async def get_consensus(question, models=None):
"""Get responses from multiple models and synthesize"""
if models is None:
models = ["gpt-5.1", "claude-sonnet-4.5", "gpt-4.2"]
# Get all responses concurrently
tasks = [
async_client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": question}]
)
for model in models
]
responses = await asyncio.gather(*tasks)
# Extract answers
answers = [r.choices[0].message.content for r in responses]
# Synthesize consensus
synthesis_prompt = f"""
I asked multiple AI models the same question and got these answers:
{chr(10).join(f'{i+1}. {ans}' for i, ans in enumerate(answers))}
Please provide a synthesized answer that captures the consensus or highlights differences.
"""
synthesis = await async_client.chat.completions.create(
model="gpt-5.1",
messages=[{"role": "user", "content": synthesis_prompt}]
)
return {
"individual_responses": answers,
"consensus": synthesis.choices[0].message.content
}
# Usage
result = asyncio.run(get_consensus("What are the main benefits of renewable energy?"))
print("Consensus:", result["consensus"])
Cost Optimization
Use cheaper models when possible, upgrade when needed:class CostOptimizedChat:
def __init__(self):
self.cheap_model = "gpt-4.1-nano"
self.premium_model = "gpt-5.1"
def chat(self, message, force_premium=False):
# Try cheap model first
if not force_premium:
response = client.chat.completions.create(
model=self.cheap_model,
messages=[
{"role": "system", "content": "If this question is too complex for you, respond with only: 'NEEDS_PREMIUM'"},
{"role": "user", "content": message}
],
temperature=0
)
result = response.choices[0].message.content
# Check if we need premium model
if "NEEDS_PREMIUM" not in result:
return {
"model": self.cheap_model,
"content": result,
"cost": "low"
}
# Use premium model
response = client.chat.completions.create(
model=self.premium_model,
messages=[{"role": "user", "content": message}]
)
return {
"model": self.premium_model,
"content": response.choices[0].message.content,
"cost": "high"
}
# Usage
chat = CostOptimizedChat()
# Simple question - uses cheap model
result = chat.chat("What is the capital of France?")
print(f"Used {result['model']}: {result['content']}")
# Complex question - automatically upgrades
result = chat.chat("Explain the philosophical implications of Gödel's incompleteness theorems")
print(f"Used {result['model']}: {result['content']}")
Specialized Pipeline
Use different models for different stages:def content_generation_pipeline(topic):
"""
Stage 1: Brainstorm (fast model)
Stage 2: Write (quality model)
Stage 3: Edit (code model for structure)
"""
# Stage 1: Brainstorm with fast model
print("Brainstorming...")
brainstorm = client.chat.completions.create(
model="gpt-5.1-mini",
messages=[{"role": "user", "content": f"Give me 5 key points about: {topic}"}]
).choices[0].message.content
# Stage 2: Write with quality model
print("Writing...")
draft = client.chat.completions.create(
model="gpt-5.1",
messages=[{"role": "user", "content": f"Write a detailed article using these points:\n{brainstorm}"}]
).choices[0].message.content
# Stage 3: Edit with Claude (excellent at editing)
print("Editing...")
final = client.chat.completions.create(
model="claude-sonnet-4.5",
messages=[{"role": "user", "content": f"Edit and improve this article:\n{draft}"}]
).choices[0].message.content
return {
"brainstorm": brainstorm,
"draft": draft,
"final": final
}
# Usage
result = content_generation_pipeline("benefits of exercise")
print("Final article:", result["final"])
More Guides
Learn how to migrate from other providers