Files
mistral-go-sdk/audio/speech.go
vikingowl 9251672de6 feat: v1.1.0 — sync with upstream Python SDK v2.1.3
Add Connectors, Audio Speech/Voices, Audio Realtime types,
and Observability (beta). 41 new service methods, 116 total.

Breaking: ListModels and UploadFile signatures changed
(pass nil for previous behavior).
2026-03-24 09:07:03 +01:00

89 lines
2.4 KiB
Go

package audio
import (
"encoding/json"
"fmt"
)
// SpeechOutputFormat is the output audio format for speech synthesis.
type SpeechOutputFormat string
const (
SpeechFormatPCM SpeechOutputFormat = "pcm"
SpeechFormatWAV SpeechOutputFormat = "wav"
SpeechFormatMP3 SpeechOutputFormat = "mp3"
SpeechFormatFLAC SpeechOutputFormat = "flac"
SpeechFormatOpus SpeechOutputFormat = "opus"
)
// SpeechRequest represents a text-to-speech request.
type SpeechRequest struct {
Input string `json:"input"`
Model string `json:"model"`
Metadata map[string]any `json:"metadata,omitempty"`
VoiceID *string `json:"voice_id,omitempty"`
RefAudio *string `json:"ref_audio,omitempty"`
ResponseFormat *SpeechOutputFormat `json:"response_format,omitempty"`
stream bool
}
// EnableStream is used internally to enable streaming.
func (r *SpeechRequest) EnableStream() { r.stream = true }
func (r *SpeechRequest) MarshalJSON() ([]byte, error) {
type Alias SpeechRequest
return json.Marshal(&struct {
Stream bool `json:"stream"`
*Alias
}{
Stream: r.stream,
Alias: (*Alias)(r),
})
}
// SpeechResponse is the response from a non-streaming speech request.
type SpeechResponse struct {
AudioData string `json:"audio_data"`
}
// SpeechStreamEvent is a sealed interface for speech streaming events.
type SpeechStreamEvent interface {
speechStreamEvent()
}
// SpeechAudioDelta contains a chunk of audio data during streaming.
type SpeechAudioDelta struct {
Type string `json:"type"`
AudioData string `json:"audio_data"`
}
func (*SpeechAudioDelta) speechStreamEvent() {}
// SpeechDone is emitted when speech synthesis is complete.
type SpeechDone struct {
Type string `json:"type"`
Usage UsageInfo `json:"usage"`
}
func (*SpeechDone) speechStreamEvent() {}
// UnmarshalSpeechStreamEvent dispatches a raw JSON event to the correct type.
func UnmarshalSpeechStreamEvent(data []byte) (SpeechStreamEvent, error) {
var probe struct {
Type string `json:"type"`
}
if err := json.Unmarshal(data, &probe); err != nil {
return nil, err
}
switch probe.Type {
case "speech.audio.delta":
var e SpeechAudioDelta
return &e, json.Unmarshal(data, &e)
case "speech.audio.done":
var e SpeechDone
return &e, json.Unmarshal(data, &e)
default:
return nil, fmt.Errorf("unknown speech stream event type: %q", probe.Type)
}
}