Overview
Different models excel at different tasks. Use multiple models to optimize for performance, cost, and quality.Model Router Pattern
Copy
from openai import OpenAI
client = OpenAI(
api_key="sk-savegate-xxxxxxxxxxxxx",
base_url="https://api.savegate.ai/v1"
)
class ModelRouter:
"""Route requests to appropriate models based on task type"""
MODELS = {
"fast": "gpt-3.5-turbo",
"smart": "gpt-4-turbo",
"code": "claude-3-5-sonnet-20241022",
"long": "gemini-1.5-pro",
"cheap": "claude-3-haiku-20240307"
}
def chat(self, message, task_type="fast"):
model = self.MODELS.get(task_type, self.MODELS["fast"])
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": message}]
)
return response.choices[0].message.content
# Usage
router = ModelRouter()
# Use fast model for simple questions
answer = router.chat("What is 2+2?", task_type="fast")
# Use smart model for complex reasoning
analysis = router.chat("Analyze the implications of quantum computing", task_type="smart")
# Use code model for programming
code = router.chat("Write a binary search function", task_type="code")
Fallback Chain
Copy
def chat_with_fallback(message, models=None):
"""Try multiple models in order until one succeeds"""
if models is None:
models = [
"gpt-4-turbo",
"claude-3-5-sonnet-20241022",
"gpt-3.5-turbo"
]
last_error = None
for model in models:
try:
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": message}],
timeout=30
)
return {
"model": model,
"content": response.choices[0].message.content
}
except Exception as e:
last_error = e
print(f"Model {model} failed: {e}")
continue
raise Exception(f"All models failed. Last error: {last_error}")
# Usage
result = chat_with_fallback("Tell me about AI")
print(f"Response from {result['model']}: {result['content']}")
Consensus Pattern
Get multiple model opinions and combine them:Copy
import asyncio
from openai import AsyncOpenAI
async_client = AsyncOpenAI(
api_key="sk-savegate-xxxxxxxxxxxxx",
base_url="https://api.savegate.ai/v1"
)
async def get_consensus(question, models=None):
"""Get responses from multiple models and synthesize"""
if models is None:
models = ["gpt-4", "claude-3-5-sonnet-20241022", "gemini-1.5-pro"]
# Get all responses concurrently
tasks = [
async_client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": question}]
)
for model in models
]
responses = await asyncio.gather(*tasks)
# Extract answers
answers = [r.choices[0].message.content for r in responses]
# Synthesize consensus
synthesis_prompt = f"""
I asked multiple AI models the same question and got these answers:
{chr(10).join(f'{i+1}. {ans}' for i, ans in enumerate(answers))}
Please provide a synthesized answer that captures the consensus or highlights differences.
"""
synthesis = await async_client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": synthesis_prompt}]
)
return {
"individual_responses": answers,
"consensus": synthesis.choices[0].message.content
}
# Usage
result = asyncio.run(get_consensus("What are the main benefits of renewable energy?"))
print("Consensus:", result["consensus"])
Cost Optimization
Use cheaper models when possible, upgrade when needed:Copy
class CostOptimizedChat:
def __init__(self):
self.cheap_model = "gpt-3.5-turbo"
self.premium_model = "gpt-4"
def chat(self, message, force_premium=False):
# Try cheap model first
if not force_premium:
response = client.chat.completions.create(
model=self.cheap_model,
messages=[
{"role": "system", "content": "If this question is too complex for you, respond with only: 'NEEDS_PREMIUM'"},
{"role": "user", "content": message}
],
temperature=0
)
result = response.choices[0].message.content
# Check if we need premium model
if "NEEDS_PREMIUM" not in result:
return {
"model": self.cheap_model,
"content": result,
"cost": "low"
}
# Use premium model
response = client.chat.completions.create(
model=self.premium_model,
messages=[{"role": "user", "content": message}]
)
return {
"model": self.premium_model,
"content": response.choices[0].message.content,
"cost": "high"
}
# Usage
chat = CostOptimizedChat()
# Simple question - uses cheap model
result = chat.chat("What is the capital of France?")
print(f"Used {result['model']}: {result['content']}")
# Complex question - automatically upgrades
result = chat.chat("Explain the philosophical implications of Gödel's incompleteness theorems")
print(f"Used {result['model']}: {result['content']}")
Specialized Pipeline
Use different models for different stages:Copy
def content_generation_pipeline(topic):
"""
Stage 1: Brainstorm (fast model)
Stage 2: Write (quality model)
Stage 3: Edit (code model for structure)
"""
# Stage 1: Brainstorm with fast model
print("Brainstorming...")
brainstorm = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"Give me 5 key points about: {topic}"}]
).choices[0].message.content
# Stage 2: Write with quality model
print("Writing...")
draft = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": f"Write a detailed article using these points:\n{brainstorm}"}]
).choices[0].message.content
# Stage 3: Edit with Claude (excellent at editing)
print("Editing...")
final = client.chat.completions.create(
model="claude-3-5-sonnet-20241022",
messages=[{"role": "user", "content": f"Edit and improve this article:\n{draft}"}]
).choices[0].message.content
return {
"brainstorm": brainstorm,
"draft": draft,
"final": final
}
# Usage
result = content_generation_pipeline("benefits of exercise")
print("Final article:", result["final"])
More Guides
Learn how to migrate from other providers