BYOK API

Text-to-Speech Example

This example shows how to generate speech audio from text using the BYOK bridge.

Setup

import { ByokClient } from "@byokapi/client"
import { makeAutoObservable, runInAction } from "mobx"

class SpeechStore {
  client: ByokClient
  audioUrl: string | null = null
  generating = false

  constructor() {
    this.client = new ByokClient({
      bridgeUrl: "http://localhost:8881/bridge",
      appName: "TTS Demo",
    })
    makeAutoObservable(this)
  }

  async init() {
    await this.client.connect()
    await this.client.requestGrant({ capabilities: ["speech"] })
  }

  async speak(text: string, voice = "alloy") {
    this.generating = true
    this.audioUrl = null

    const api = this.client.getBridgeAPI()
    const grantId = this.client.getActiveGrantId()

    const result = await api.doSpeech({
      grantId,
      modelId: "tts-1",
      text,
      voice,
    })

    // result.audio is a base64-encoded audio string
    const blob = new Blob(
      [Uint8Array.from(atob(result.audio), (c) => c.charCodeAt(0))],
      { type: "audio/mp3" }
    )

    runInAction(() => {
      this.audioUrl = URL.createObjectURL(blob)
      this.generating = false
    })
  }
}

React component

import { observer } from "mobx-react-lite"

const TTS = observer(({ store }: { store: SpeechStore }) => {
  const [text, setText] = useState("Hello, world!")
  const [voice, setVoice] = useState("alloy")

  return (
    <div>
      <textarea value={text} onChange={(e) => setText(e.target.value)} />
      <select value={voice} onChange={(e) => setVoice(e.target.value)}>
        <option value="alloy">Alloy</option>
        <option value="echo">Echo</option>
        <option value="fable">Fable</option>
        <option value="onyx">Onyx</option>
        <option value="nova">Nova</option>
        <option value="shimmer">Shimmer</option>
      </select>
      <button onClick={() => store.speak(text, voice)} disabled={store.generating}>
        {store.generating ? "Generating..." : "Speak"}
      </button>
      {store.audioUrl && <audio src={store.audioUrl} controls autoPlay />}
    </div>
  )
})

Available voices

OpenAI TTS supports these voices: alloy, echo, fable, onyx, nova, shimmer. Each has a distinct tone and personality.

Key points

  • Speech capability — request speech in the grant, not language
  • Base64 audio — the bridge returns audio as base64 since binary data can't traverse postMessage efficiently
  • Object URLs — convert the base64 to a Blob and create an object URL for the <audio> element

On this page