Streaming

Get AI responses token-by-token for a real-time experience. Both the Chat Completions and Responses API support streaming.

Chat Completions Streaming

Set stream: true in your Chat Completions request to receive server-sent events (SSE) as the model generates tokens.

cURL

Bash

curl https://mume.ai/api/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $MUME_API_KEY" \
  -d '{
    "model": "openai/gpt-4.1-mini",
    "messages": [
      {"role": "user", "content": "Write a poem about the moon."}
    ],
    "stream": true
  }'

Python

import openai

client = openai.OpenAI(
    api_key="your-api-key",
    base_url="https://mume.ai/api/v1",
)

response = client.chat.completions.create(
    model="openai/gpt-4.1-mini",
    messages=[{"role": "user", "content": "Write a poem about the moon."}],
    stream=True,
)

for chunk in response:
    content = getattr(chunk.choices[0].delta, "content", None)
    if content is not None:
        print(content, end="", flush=True)

JavaScript

import OpenAI from "openai";

const client = new OpenAI({
  apiKey: "your-api-key",
  baseURL: "https://mume.ai/api/v1",
});

const stream = await client.chat.completions.create({
  model: "openai/gpt-4.1-mini",
  messages: [{role: "user", content: "Write a poem about the moon."}],
  stream: true,
});

for await (const chunk of stream) {
  const content = chunk.choices[0]?.delta?.content;
  if (content) {
    process.stdout.write(content);
  }
}

Responses API Streaming

The Responses API uses a different event format. Listen for response.output_text.delta events.

Python

response = client.responses.create(
    model="openai/gpt-4.1-nano",
    input=[
        {"type": "message", "content": "Write a poem about the moon.", "role": "user"}
    ],
    stream=True,
)

for chunk in response:
    if chunk.type == "response.output_text.delta":
        print(chunk.delta, end="", flush=True)

JavaScript

const stream = await client.responses.create({
  model: "openai/gpt-4.1-nano",
  input: [
    {type: "message", content: "Write a poem about the moon.", role: "user"},
  ],
  stream: true,
});

for await (const chunk of stream) {
  if (chunk.type === "response.output_text.delta") {
    process.stdout.write(chunk.delta);
  }
}

Async Streaming (Python)

For high-performance applications, use the async client with asyncio:

Chat Completions

Python

import openai
import asyncio

async_client = openai.AsyncOpenAI(
    api_key="your-api-key",
    base_url="https://mume.ai/api/v1",
)

async def main():
    async with async_client.chat.completions.stream(
        model="openai/gpt-4.1-mini",
        messages=[{"role": "user", "content": "Write a poem about the moon."}],
    ) as stream:
        async for chunk in stream:
            if chunk.type == "content.delta":
                print(chunk.delta, end="", flush=True)

asyncio.run(main())

Responses API

Python

async def main():
    async with async_client.responses.stream(
        model="openai/gpt-4.1-mini",
        input="Write a poem about the moon.",
    ) as stream:
        async for chunk in stream:
            if chunk.type == "response.output_text.delta":
                print(chunk.delta, end="", flush=True)

asyncio.run(main())

← Responses API Next: Function Calling →

Streaming

Get AI responses token-by-token for a real-time experience. Both the Chat Completions and Responses API support streaming.

Chat Completions Streaming

Set stream: true in your Chat Completions request to receive server-sent events (SSE) as the model generates tokens.

cURL

Bash

curl https://mume.ai/api/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $MUME_API_KEY" \
  -d '{
    "model": "openai/gpt-4.1-mini",
    "messages": [
      {"role": "user", "content": "Write a poem about the moon."}
    ],
    "stream": true
  }'

Python

import openai

client = openai.OpenAI(
    api_key="your-api-key",
    base_url="https://mume.ai/api/v1",
)

response = client.chat.completions.create(
    model="openai/gpt-4.1-mini",
    messages=[{"role": "user", "content": "Write a poem about the moon."}],
    stream=True,
)

for chunk in response:
    content = getattr(chunk.choices[0].delta, "content", None)
    if content is not None:
        print(content, end="", flush=True)

JavaScript

import OpenAI from "openai";

const client = new OpenAI({
  apiKey: "your-api-key",
  baseURL: "https://mume.ai/api/v1",
});

const stream = await client.chat.completions.create({
  model: "openai/gpt-4.1-mini",
  messages: [{role: "user", content: "Write a poem about the moon."}],
  stream: true,
});

for await (const chunk of stream) {
  const content = chunk.choices[0]?.delta?.content;
  if (content) {
    process.stdout.write(content);
  }
}

Responses API Streaming

The Responses API uses a different event format. Listen for response.output_text.delta events.

Python

response = client.responses.create(
    model="openai/gpt-4.1-nano",
    input=[
        {"type": "message", "content": "Write a poem about the moon.", "role": "user"}
    ],
    stream=True,
)

for chunk in response:
    if chunk.type == "response.output_text.delta":
        print(chunk.delta, end="", flush=True)

JavaScript

const stream = await client.responses.create({
  model: "openai/gpt-4.1-nano",
  input: [
    {type: "message", content: "Write a poem about the moon.", role: "user"},
  ],
  stream: true,
});

for await (const chunk of stream) {
  if (chunk.type === "response.output_text.delta") {
    process.stdout.write(chunk.delta);
  }
}

Async Streaming (Python)

For high-performance applications, use the async client with asyncio:

Chat Completions

Python

import openai
import asyncio

async_client = openai.AsyncOpenAI(
    api_key="your-api-key",
    base_url="https://mume.ai/api/v1",
)

async def main():
    async with async_client.chat.completions.stream(
        model="openai/gpt-4.1-mini",
        messages=[{"role": "user", "content": "Write a poem about the moon."}],
    ) as stream:
        async for chunk in stream:
            if chunk.type == "content.delta":
                print(chunk.delta, end="", flush=True)

asyncio.run(main())

Responses API

Python

async def main():
    async with async_client.responses.stream(
        model="openai/gpt-4.1-mini",
        input="Write a poem about the moon.",
    ) as stream:
        async for chunk in stream:
            if chunk.type == "response.output_text.delta":
                print(chunk.delta, end="", flush=True)

asyncio.run(main())

← Responses API Next: Function Calling →