Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.savegate.ai/llms.txt

Use this file to discover all available pages before exploring further.

Overview

Different models excel at different tasks. Use multiple models to optimize for performance, cost, and quality.

Model Router Pattern

from openai import OpenAI

client = OpenAI(
    api_key="sk-savegate-xxxxxxxxxxxxx",
    base_url="https://api.savegate.ai/v1"
)

class ModelRouter:
    """Route requests to appropriate models based on task type"""

    MODELS = {
        "fast": "gpt-5.1-mini",
        "smart": "gpt-5.1",
        "code": "claude-sonnet-4.5",
        "reasoning": "o3",
        "cheap": "gpt-4.1-nano"
    }

    def chat(self, message, task_type="fast"):
        model = self.MODELS.get(task_type, self.MODELS["fast"])

        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": message}]
        )

        return response.choices[0].message.content

# Usage
router = ModelRouter()

# Use fast model for simple questions
answer = router.chat("What is 2+2?", task_type="fast")

# Use smart model for complex reasoning
analysis = router.chat("Analyze the implications of quantum computing", task_type="smart")

# Use code model for programming
code = router.chat("Write a binary search function", task_type="code")

Fallback Chain

def chat_with_fallback(message, models=None):
    """Try multiple models in order until one succeeds"""
    if models is None:
        models = [
            "gpt-5.1",
            "claude-sonnet-4.5",
            "gpt-4.2"
        ]

    last_error = None

    for model in models:
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[{"role": "user", "content": message}],
                timeout=30
            )
            return {
                "model": model,
                "content": response.choices[0].message.content
            }
        except Exception as e:
            last_error = e
            print(f"Model {model} failed: {e}")
            continue

    raise Exception(f"All models failed. Last error: {last_error}")

# Usage
result = chat_with_fallback("Tell me about AI")
print(f"Response from {result['model']}: {result['content']}")

Consensus Pattern

Get multiple model opinions and combine them:
import asyncio
from openai import AsyncOpenAI

async_client = AsyncOpenAI(
    api_key="sk-savegate-xxxxxxxxxxxxx",
    base_url="https://api.savegate.ai/v1"
)

async def get_consensus(question, models=None):
    """Get responses from multiple models and synthesize"""
    if models is None:
        models = ["gpt-5.1", "claude-sonnet-4.5", "gpt-4.2"]

    # Get all responses concurrently
    tasks = [
        async_client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": question}]
        )
        for model in models
    ]

    responses = await asyncio.gather(*tasks)

    # Extract answers
    answers = [r.choices[0].message.content for r in responses]

    # Synthesize consensus
    synthesis_prompt = f"""
    I asked multiple AI models the same question and got these answers:

    {chr(10).join(f'{i+1}. {ans}' for i, ans in enumerate(answers))}

    Please provide a synthesized answer that captures the consensus or highlights differences.
    """

    synthesis = await async_client.chat.completions.create(
        model="gpt-5.1",
        messages=[{"role": "user", "content": synthesis_prompt}]
    )

    return {
        "individual_responses": answers,
        "consensus": synthesis.choices[0].message.content
    }

# Usage
result = asyncio.run(get_consensus("What are the main benefits of renewable energy?"))
print("Consensus:", result["consensus"])

Cost Optimization

Use cheaper models when possible, upgrade when needed:
class CostOptimizedChat:
    def __init__(self):
        self.cheap_model = "gpt-4.1-nano"
        self.premium_model = "gpt-5.1"

    def chat(self, message, force_premium=False):
        # Try cheap model first
        if not force_premium:
            response = client.chat.completions.create(
                model=self.cheap_model,
                messages=[
                    {"role": "system", "content": "If this question is too complex for you, respond with only: 'NEEDS_PREMIUM'"},
                    {"role": "user", "content": message}
                ],
                temperature=0
            )

            result = response.choices[0].message.content

            # Check if we need premium model
            if "NEEDS_PREMIUM" not in result:
                return {
                    "model": self.cheap_model,
                    "content": result,
                    "cost": "low"
                }

        # Use premium model
        response = client.chat.completions.create(
            model=self.premium_model,
            messages=[{"role": "user", "content": message}]
        )

        return {
            "model": self.premium_model,
            "content": response.choices[0].message.content,
            "cost": "high"
        }

# Usage
chat = CostOptimizedChat()

# Simple question - uses cheap model
result = chat.chat("What is the capital of France?")
print(f"Used {result['model']}: {result['content']}")

# Complex question - automatically upgrades
result = chat.chat("Explain the philosophical implications of Gödel's incompleteness theorems")
print(f"Used {result['model']}: {result['content']}")

Specialized Pipeline

Use different models for different stages:
def content_generation_pipeline(topic):
    """
    Stage 1: Brainstorm (fast model)
    Stage 2: Write (quality model)
    Stage 3: Edit (code model for structure)
    """

    # Stage 1: Brainstorm with fast model
    print("Brainstorming...")
    brainstorm = client.chat.completions.create(
        model="gpt-5.1-mini",
        messages=[{"role": "user", "content": f"Give me 5 key points about: {topic}"}]
    ).choices[0].message.content

    # Stage 2: Write with quality model
    print("Writing...")
    draft = client.chat.completions.create(
        model="gpt-5.1",
        messages=[{"role": "user", "content": f"Write a detailed article using these points:\n{brainstorm}"}]
    ).choices[0].message.content

    # Stage 3: Edit with Claude (excellent at editing)
    print("Editing...")
    final = client.chat.completions.create(
        model="claude-sonnet-4.5",
        messages=[{"role": "user", "content": f"Edit and improve this article:\n{draft}"}]
    ).choices[0].message.content

    return {
        "brainstorm": brainstorm,
        "draft": draft,
        "final": final
    }

# Usage
result = content_generation_pipeline("benefits of exercise")
print("Final article:", result["final"])

More Guides

Learn how to migrate from other providers