Server-Sent Events (SSE) is a standard HTTP-based protocol for server-to-client streaming. It provides:
This document describes how TanStack AI transmits StreamChunks over Server-Sent Events (SSE), the recommended protocol for most use cases.
Method: POST
Headers:
Content-Type: application/json
Content-Type: application/json
Body:
{
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
],
"data": {
// Optional additional data
}
}
{
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
],
"data": {
// Optional additional data
}
}
Status: 200 OK
Headers:
Content-Type: text/event-stream
Cache-Control: no-cache
Connection: keep-alive
Content-Type: text/event-stream
Cache-Control: no-cache
Connection: keep-alive
Body: Stream of SSE events
Each StreamChunk is transmitted as an SSE event with the following format:
data: {JSON_ENCODED_CHUNK}\n\n
data: {JSON_ENCODED_CHUNK}\n\n
data: {"type":"content","id":"chatcmpl-abc123","model":"gpt-4o","timestamp":1701234567890,"delta":"Hello","content":"Hello","role":"assistant"}\n\n
data: {"type":"content","id":"chatcmpl-abc123","model":"gpt-4o","timestamp":1701234567890,"delta":"Hello","content":"Hello","role":"assistant"}\n\n
data: {"type":"tool_call","id":"chatcmpl-abc123","model":"gpt-4o","timestamp":1701234567891,"toolCall":{"id":"call_xyz","type":"function","function":{"name":"get_weather","arguments":"{\"location\":\"SF\"}"}},"index":0}\n\n
data: {"type":"tool_call","id":"chatcmpl-abc123","model":"gpt-4o","timestamp":1701234567891,"toolCall":{"id":"call_xyz","type":"function","function":{"name":"get_weather","arguments":"{\"location\":\"SF\"}"}},"index":0}\n\n
data: {"type":"done","id":"chatcmpl-abc123","model":"gpt-4o","timestamp":1701234567892,"finishReason":"stop","usage":{"promptTokens":10,"completionTokens":5,"totalTokens":15}}\n\n
data: {"type":"done","id":"chatcmpl-abc123","model":"gpt-4o","timestamp":1701234567892,"finishReason":"stop","usage":{"promptTokens":10,"completionTokens":5,"totalTokens":15}}\n\n
// Client code
const response = await fetch('/api/chat', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ messages }),
});
// Client code
const response = await fetch('/api/chat', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ messages }),
});
HTTP/1.1 200 OK
Content-Type: text/event-stream
Cache-Control: no-cache
Connection: keep-alive
HTTP/1.1 200 OK
Content-Type: text/event-stream
Cache-Control: no-cache
Connection: keep-alive
The server sends multiple data: events as chunks are generated:
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567890,"delta":"The","content":"The"}\n\n
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567891,"delta":" weather","content":"The weather"}\n\n
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567892,"delta":" is","content":"The weather is"}\n\n
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567893,"delta":" sunny","content":"The weather is sunny"}\n\n
data: {"type":"done","id":"msg_1","model":"gpt-4o","timestamp":1701234567894,"finishReason":"stop"}\n\n
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567890,"delta":"The","content":"The"}\n\n
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567891,"delta":" weather","content":"The weather"}\n\n
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567892,"delta":" is","content":"The weather is"}\n\n
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567893,"delta":" sunny","content":"The weather is sunny"}\n\n
data: {"type":"done","id":"msg_1","model":"gpt-4o","timestamp":1701234567894,"finishReason":"stop"}\n\n
After the final chunk, the server sends a completion marker:
data: [DONE]\n\n
data: [DONE]\n\n
Then closes the connection.
If an error occurs during generation, send an error chunk:
data: {"type":"error","id":"msg_1","model":"gpt-4o","timestamp":1701234567895,"error":{"message":"Rate limit exceeded","code":"rate_limit_exceeded"}}\n\n
data: {"type":"error","id":"msg_1","model":"gpt-4o","timestamp":1701234567895,"error":{"message":"Rate limit exceeded","code":"rate_limit_exceeded"}}\n\n
Then close the connection.
SSE provides automatic reconnection:
TanStack AI provides toServerSentEventsStream() and toStreamResponse() utilities:
import { chat, toStreamResponse } from '@tanstack/ai';
import { openai } from '@tanstack/ai-openai';
export async function POST(request: Request) {
const { messages } = await request.json();
const stream = chat({
adapter: openai(),
messages,
model: 'gpt-4o',
});
// Automatically converts StreamChunks to SSE format
return toStreamResponse(stream);
}
import { chat, toStreamResponse } from '@tanstack/ai';
import { openai } from '@tanstack/ai-openai';
export async function POST(request: Request) {
const { messages } = await request.json();
const stream = chat({
adapter: openai(),
messages,
model: 'gpt-4o',
});
// Automatically converts StreamChunks to SSE format
return toStreamResponse(stream);
}
What toStreamResponse() does:
TanStack AI provides fetchServerSentEvents() connection adapter:
import { useChat, fetchServerSentEvents } from '@tanstack/ai-react';
const { messages, sendMessage } = useChat({
connection: fetchServerSentEvents('/api/chat'),
});
import { useChat, fetchServerSentEvents } from '@tanstack/ai-react';
const { messages, sendMessage } = useChat({
connection: fetchServerSentEvents('/api/chat'),
});
What fetchServerSentEvents() does:
If you need custom handling:
export async function POST(request: Request) {
const { messages } = await request.json();
const encoder = new TextEncoder();
const stream = new ReadableStream({
async start(controller) {
try {
for await (const chunk of chat({ ... })) {
const sseData = `data: ${JSON.stringify(chunk)}\n\n`;
controller.enqueue(encoder.encode(sseData));
}
controller.enqueue(encoder.encode('data: [DONE]\n\n'));
controller.close();
} catch (error) {
const errorChunk = {
type: 'error',
error: { message: error.message }
};
controller.enqueue(encoder.encode(`data: ${JSON.stringify(errorChunk)}\n\n`));
controller.close();
}
}
});
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
},
});
}
export async function POST(request: Request) {
const { messages } = await request.json();
const encoder = new TextEncoder();
const stream = new ReadableStream({
async start(controller) {
try {
for await (const chunk of chat({ ... })) {
const sseData = `data: ${JSON.stringify(chunk)}\n\n`;
controller.enqueue(encoder.encode(sseData));
}
controller.enqueue(encoder.encode('data: [DONE]\n\n'));
controller.close();
} catch (error) {
const errorChunk = {
type: 'error',
error: { message: error.message }
};
controller.enqueue(encoder.encode(`data: ${JSON.stringify(errorChunk)}\n\n`));
controller.close();
}
}
});
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
},
});
}
const response = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ messages }),
});
const reader = response.body!.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') continue;
const chunk = JSON.parse(data);
// Handle chunk...
}
}
}
const response = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ messages }),
});
const reader = response.body!.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') continue;
const chunk = JSON.parse(data);
// Handle chunk...
}
}
}
Browser DevTools:
cURL:
curl -N -X POST http://localhost:3000/api/chat \
-H "Content-Type: application/json" \
-d '{"messages":[{"role":"user","content":"Hello"}]}'
curl -N -X POST http://localhost:3000/api/chat \
-H "Content-Type: application/json" \
-d '{"messages":[{"role":"user","content":"Hello"}]}'
The -N flag disables buffering to see real-time output.
Example Output:
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567890,"delta":"Hello","content":"Hello"}
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567891,"delta":" there","content":"Hello there"}
data: {"type":"done","id":"msg_1","model":"gpt-4o","timestamp":1701234567892,"finishReason":"stop"}
data: [DONE]
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567890,"delta":"Hello","content":"Hello"}
data: {"type":"content","id":"msg_1","model":"gpt-4o","timestamp":1701234567891,"delta":" there","content":"Hello there"}
data: {"type":"done","id":"msg_1","model":"gpt-4o","timestamp":1701234567892,"finishReason":"stop"}
data: [DONE]
