Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | import z from "zod";
import { ELEVEN_LABS_API_BASE_URL } from "../constants";
// Zod schemas (keeping these for validation)
const AllowedOutputFormatsSchema = z.enum([
"mp3_22050_32",
"mp3_24000_48",
"mp3_44100_32",
"mp3_44100_64",
"mp3_44100_96",
"mp3_44100_128",
"mp3_44100_192",
"pcm_8000",
"pcm_16000",
"pcm_22050",
"pcm_24000",
"pcm_32000",
"pcm_44100",
"pcm_48000",
"ulaw_8000",
"alaw_8000",
"opus_48000_32",
"opus_48000_64",
"opus_48000_96",
"opus_48000_128",
"opus_48000_192",
]);
const DialogueInputSchema = z.object({
text: z.string(),
voiceID: z.string(),
});
const ModelSettingsSchema = z.object({
stability: z.number().nullable().optional().default(0.5),
});
const PronunciationDictionaryLocatorSchema = z.object({
pronunciationDictionaryID: z.string(),
versionID: z.string().nullable().optional(),
});
const ApplyTextNormalizationSchema = z.enum(["auto", "on", "off"]).default("auto");
const TextToDialogueOutputSchema = z.object({
audio: z.string(), // base64 encoded audio
contentType: z.string(),
});
// Types
export type DialogueInput = z.infer<typeof DialogueInputSchema>;
export type ModelSettings = z.infer<typeof ModelSettingsSchema>;
export type PronunciationDictionaryLocator = z.infer<typeof PronunciationDictionaryLocatorSchema>;
export type AllowedOutputFormats = z.infer<typeof AllowedOutputFormatsSchema>;
export type ApplyTextNormalization = z.infer<typeof ApplyTextNormalizationSchema>;
export type TextToDialogueOutput = z.infer<typeof TextToDialogueOutputSchema>;
export interface TextToDialogueParams {
inputs: DialogueInput[];
modelID?: string;
languageCode?: string | null;
settings?: ModelSettings | null;
pronunciationDictionaryLocators?: PronunciationDictionaryLocator[] | null;
seed?: number | null;
applyTextNormalization?: ApplyTextNormalization;
outputFormat?: AllowedOutputFormats;
}
// Data fetching function
export async function textToDialogue(params: TextToDialogueParams): Promise<TextToDialogueOutput> {
const {
inputs,
modelID = "eleven_v3",
languageCode,
settings,
pronunciationDictionaryLocators,
seed,
applyTextNormalization,
outputFormat,
} = params;
// Build query params
const queryParams = new URLSearchParams();
if (outputFormat) {
queryParams.append("output_format", outputFormat);
}
const url = `${ELEVEN_LABS_API_BASE_URL}/text-to-dialogue${queryParams.toString() ? `?${queryParams.toString()}` : ""}`;
// Transform camelCase to snake_case for API
const requestBody = {
inputs: inputs.map((item) => ({
text: item.text,
voice_id: item.voiceID,
})),
model_id: modelID,
language_code: languageCode,
settings: settings,
pronunciation_dictionary_locators: pronunciationDictionaryLocators?.map((locator) => ({
pronunciation_dictionary_id: locator.pronunciationDictionaryID,
version_id: locator.versionID,
})),
seed: seed,
apply_text_normalization: applyTextNormalization,
};
const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`ElevenLabs API error (${response.status}): ${errorText}`);
}
// Get and validate the array buffer
const arrayBuffer = await response.arrayBuffer();
// Validate content type
const contentType = response.headers.get("content-type") || "application/octet-stream";
// Validate that we actually received audio data
if (arrayBuffer.byteLength === 0) {
throw new Error("Received empty audio response from ElevenLabs API");
}
// Convert audio to base64
const uint8Array = new Uint8Array(arrayBuffer);
const base64 = btoa(String.fromCharCode(...uint8Array));
// Validate base64 string
if (!base64 || base64.length === 0) {
throw new Error("Failed to convert audio to base64");
}
const result = {
audio: base64,
contentType,
};
// Validate output with Zod
return TextToDialogueOutputSchema.parse(result);
}
|