Documentation Index
Fetch the complete documentation index at: https://docs.savegate.ai/llms.txt
Use this file to discover all available pages before exploring further.
Why Stream?
Streaming provides immediate feedback to users, making your application feel faster and more responsive.Python Streaming
Basic Streaming
from openai import OpenAI
client = OpenAI(
api_key="sk-savegate-xxxxxxxxxxxxx",
base_url="https://api.savegate.ai/v1"
)
def stream_response(message, model="gpt-4"):
stream = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": message}],
stream=True
)
print("Response: ", end="")
for chunk in stream:
if chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
print(content, end="", flush=True)
print() # New line at end
# Usage
stream_response("Tell me a story about a robot")
Streaming with Callback
def stream_with_callback(message, callback, model="gpt-4"):
"""Stream response and call callback for each chunk"""
stream = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": message}],
stream=True
)
full_response = ""
for chunk in stream:
if chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
full_response += content
callback(content)
return full_response
# Usage with custom callback
def print_colored(text):
print(f"\033[92m{text}\033[0m", end="", flush=True)
response = stream_with_callback("Write a poem", callback=print_colored)
Async Streaming
import asyncio
from openai import AsyncOpenAI
async_client = AsyncOpenAI(
api_key="sk-savegate-xxxxxxxxxxxxx",
base_url="https://api.savegate.ai/v1"
)
async def async_stream(message, model="gpt-4"):
stream = await async_client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": message}],
stream=True
)
full_response = ""
async for chunk in stream:
if chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
print(content, end="", flush=True)
full_response += content
return full_response
# Usage
asyncio.run(async_stream("Explain quantum computing"))
Node.js Streaming
Basic Streaming
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: 'sk-savegate-xxxxxxxxxxxxx',
baseURL: 'https://api.savegate.ai/v1'
});
async function streamResponse(message, model = 'gpt-4') {
const stream = await client.chat.completions.create({
model: model,
messages: [{ role: 'user', content: message }],
stream: true
});
process.stdout.write('Response: ');
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
process.stdout.write(content);
}
console.log();
}
// Usage
streamResponse('Tell me a story about a robot');
Streaming with Event Emitter
import { EventEmitter } from 'events';
async function streamWithEvents(message, model = 'gpt-4') {
const emitter = new EventEmitter();
const stream = await client.chat.completions.create({
model: model,
messages: [{ role: 'user', content: message }],
stream: true
});
let fullResponse = '';
(async () => {
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
if (content) {
fullResponse += content;
emitter.emit('chunk', content);
}
}
emitter.emit('done', fullResponse);
})();
return emitter;
}
// Usage
const stream = await streamWithEvents('Write a poem');
stream.on('chunk', (chunk) => {
process.stdout.write(chunk);
});
stream.on('done', (fullResponse) => {
console.log('\n\nFull response received!');
});
Web Streaming with Server-Sent Events
FastAPI Backend
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from openai import OpenAI
import json
app = FastAPI()
client = OpenAI(
api_key="sk-savegate-xxxxxxxxxxxxx",
base_url="https://api.savegate.ai/v1"
)
async def stream_generator(message: str):
stream = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": message}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
data = json.dumps({"content": chunk.choices[0].delta.content})
yield f"data: {data}\n\n"
@app.post("/stream")
async def stream_chat(message: str):
return StreamingResponse(
stream_generator(message),
media_type="text/event-stream"
)
Express.js Backend
import express from 'express';
import OpenAI from 'openai';
const app = express();
const client = new OpenAI({
apiKey: 'sk-savegate-xxxxxxxxxxxxx',
baseURL: 'https://api.savegate.ai/v1'
});
app.use(express.json());
app.post('/stream', async (req, res) => {
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
const stream = await client.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: req.body.message }],
stream: true
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
if (content) {
res.write(`data: ${JSON.stringify({ content })}\n\n`);
}
}
res.write('data: [DONE]\n\n');
res.end();
});
app.listen(3000, () => console.log('Server running on port 3000'));
Frontend (HTML + JavaScript)
<!DOCTYPE html>
<html>
<head>
<title>Streaming Chat</title>
</head>
<body>
<div id="chat-container">
<div id="messages"></div>
<input type="text" id="user-input" placeholder="Type a message...">
<button onclick="sendMessage()">Send</button>
</div>
<script>
async function sendMessage() {
const input = document.getElementById('user-input');
const message = input.value;
input.value = '';
// Add user message
addMessage('User', message);
// Create message div for assistant
const assistantDiv = addMessage('Assistant', '');
// Start streaming
const response = await fetch('/stream', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ message })
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { value, done } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') break;
try {
const parsed = JSON.parse(data);
assistantDiv.textContent += parsed.content;
} catch (e) {
console.error('Parse error:', e);
}
}
}
}
}
function addMessage(sender, content) {
const messagesDiv = document.getElementById('messages');
const messageDiv = document.createElement('div');
messageDiv.innerHTML = `<strong>${sender}:</strong> <span>${content}</span>`;
messagesDiv.appendChild(messageDiv);
return messageDiv.querySelector('span');
}
</script>
</body>
</html>
Error Handling in Streams
def safe_stream(message, model="gpt-4"):
try:
stream = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": message}],
stream=True
)
for chunk in stream:
try:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
except (AttributeError, IndexError) as e:
# Handle malformed chunks
continue
except Exception as e:
print(f"\nError during streaming: {e}")
return None
print() # New line at end
Next Steps
Learn how to use function calling for tool integration