Stream tokens as they are generated instead of waiting for the full response. This dramatically reduces perceived latency for chat interfaces and long outputs.
Set stream: true in your request. The API responds with a series of server-sent events (SSE). Each event contains a chat.completion.chunk with a delta object — read choices[0].delta.content and concatenate to build the full message. The stream ends with data: [DONE].
from openai import OpenAI
client = OpenAI(
api_key="sk-live-your-key",
base_url="https://inferexapi.cloudvoice.in/v1",
)
stream = client.chat.completions.create(
model="default",
messages=[{"role": "user", "content": "Write a haiku about APIs."}],
stream=True,
)
for chunk in stream:
delta = chunk.choices[0].delta.content
if delta:
print(delta, end="", flush=True)
print() # newline at endimport OpenAI from "openai";
const client = new OpenAI({
apiKey: "sk-live-your-key",
baseURL: "https://inferexapi.cloudvoice.in/v1",
});
const stream = await client.chat.completions.create({
model: "default",
messages: [{ role: "user", content: "Write a haiku about APIs." }],
stream: true,
});
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta?.content ?? "";
process.stdout.write(delta);
}Parse SSE manually when you can't use the OpenAI SDK:
const response = await fetch(
"https://inferexapi.cloudvoice.in/v1/chat/completions",
{
method: "POST",
headers: {
"Authorization": "Bearer sk-live-your-key",
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "default",
messages: [{ role: "user", content: "Hello!" }],
stream: true,
}),
}
);
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const lines = decoder.decode(value).split("\n");
for (const line of lines) {
if (!line.startsWith("data: ")) continue;
const data = line.slice(6).trim();
if (data === "[DONE]") break;
const chunk = JSON.parse(data);
const delta = chunk.choices[0]?.delta?.content ?? "";
process.stdout.write(delta);
}
}