
text-to-audio
Idle
{ "audio": "https://d1q70pf5vjeyhc.cloudfront.net/predictions/1298dfeb67a04c7fa1bac264734c41a9/1.mp3", "alignment": { "characters": [ "W", "e", "l", "c", "o", "m", "e", " ", "t", "o", " ", "o", "u", "r", " ", "a", "d", "v", "a", "n", "c", "e", "d", " ", "t", "e", "x", "t", "-", "t", "o", "-", "s", "p", "e", "e", "c", "h", " ", "s", "y", "s", "t", "e", "m", "!", " ", "E", "x", "p", "e", "r", "i", "e", "n", "c", "e", " ", "h", "i", "g", "h", "-", "q", "u", "a", "l", "i", "t", "y", " ", "v", "o", "i", "c", "e", " ", "s", "y", "n", "t", "h", "e", "s", "i", "s", " ", "w", "i", "t", "h", " ", "n", "a", "t", "u", "r", "a", "l", " ", "p", "r", "o", "n", "u", "n", "c", "i", "a", "t", "i", "o", "n", " ", "a", "n", "d", " ", "c", "l", "e", "a", "r", " ", "a", "r", "t", "i", "c", "u", "l", "a", "t", "i", "o", "n", "." ], "character_end_times_seconds": [ 0.045, 0.09, 0.135, 0.18, 0.225, 0.27, 0.315, 0.368, 0.421, 0.474, 0.514, 0.554, 0.594, 0.634, 0.705, 0.776, 0.847, 0.918, 0.989, 1.06, 1.131, 1.202, 1.273, 1.369, 1.465, 1.561, 1.657, 1.753, 1.806, 1.859, 1.912, 1.952, 1.992, 2.056, 2.12, 2.184, 2.248, 2.312, 2.38, 2.448, 2.516, 2.584, 2.652, 2.72, 2.788, 2.868, 2.962, 3.056, 3.15, 3.244, 3.338, 3.432, 3.526, 3.62, 3.714, 3.808, 3.902, 3.95, 3.998, 4.046, 4.094, 4.142, 4.192, 4.242, 4.292, 4.342, 4.392, 4.442, 4.492, 4.542, 4.595, 4.648, 4.701, 4.754, 4.807, 4.86, 4.932, 5.004, 5.076, 5.148, 5.22, 5.292, 5.364, 5.436, 5.508, 5.58, 5.628, 5.676, 5.724, 5.772, 5.82, 5.87, 5.92, 5.97, 6.02, 6.07, 6.12, 6.17, 6.22, 6.288, 6.356, 6.424, 6.492, 6.56, 6.628, 6.696, 6.764, 6.832, 6.9, 6.968, 7.036, 7.104, 7.172, 7.272, 7.372, 7.472, 7.572, 7.625, 7.678, 7.731, 7.784, 7.837, 7.89, 7.97, 8.05, 8.13, 8.21, 8.29, 8.37, 8.45, 8.53, 8.61, 8.69, 8.77, 8.85, 8.93, 9.09 ], "character_start_times_seconds": [ 0, 0.045, 0.09, 0.135, 0.18, 0.225, 0.27, 0.315, 0.368, 0.421, 0.474, 0.514, 0.554, 0.594, 0.634, 0.705, 0.776, 0.847, 0.918, 0.989, 1.06, 1.131, 1.202, 1.273, 1.369, 1.465, 1.561, 1.657, 1.753, 1.806, 1.859, 1.912, 1.952, 1.992, 2.056, 2.12, 2.184, 2.248, 2.312, 2.38, 2.448, 2.516, 2.584, 2.652, 2.72, 2.788, 2.868, 2.962, 3.056, 3.15, 3.244, 3.338, 3.432, 3.526, 3.62, 3.714, 3.808, 3.902, 3.95, 3.998, 4.046, 4.094, 4.142, 4.192, 4.242, 4.292, 4.342, 4.392, 4.442, 4.492, 4.542, 4.595, 4.648, 4.701, 4.754, 4.807, 4.86, 4.932, 5.004, 5.076, 5.148, 5.22, 5.292, 5.364, 5.436, 5.508, 5.58, 5.628, 5.676, 5.724, 5.772, 5.82, 5.87, 5.92, 5.97, 6.02, 6.07, 6.12, 6.17, 6.22, 6.288, 6.356, 6.424, 6.492, 6.56, 6.628, 6.696, 6.764, 6.832, 6.9, 6.968, 7.036, 7.104, 7.172, 7.272, 7.372, 7.472, 7.572, 7.625, 7.678, 7.731, 7.784, 7.837, 7.89, 7.97, 8.05, 8.13, 8.21, 8.29, 8.37, 8.45, 8.53, 8.61, 8.69, 8.77, 8.85, 8.93 ] }, "normalized_alignment": { "characters": [ "W", "e", "l", "c", "o", "m", "e", " ", "t", "o", " ", "o", "u", "r", " ", "a", "d", "v", "a", "n", "c", "e", "d", " ", "t", "e", "x", "t", "-", "t", "o", "-", "s", "p", "e", "e", "c", "h", " ", "s", "y", "s", "t", "e", "m", "!", " ", "E", "x", "p", "e", "r", "i", "e", "n", "c", "e", " ", "h", "i", "g", "h", "-", "q", "u", "a", "l", "i", "t", "y", " ", "v", "o", "i", "c", "e", " ", "s", "y", "n", "t", "h", "e", "s", "i", "s", " ", "w", "i", "t", "h", " ", "n", "a", "t", "u", "r", "a", "l", " ", "p", "r", "o", "n", "u", "n", "c", "i", "a", "t", "i", "o", "n", " ", "a", "n", "d", " ", "c", "l", "e", "a", "r", " ", "a", "r", "t", "i", "c", "u", "l", "a", "t", "i", "o", "n", "." ], "character_end_times_seconds": [ 0.045, 0.09, 0.135, 0.18, 0.225, 0.27, 0.315, 0.368, 0.421, 0.474, 0.514, 0.554, 0.594, 0.634, 0.705, 0.776, 0.847, 0.918, 0.989, 1.06, 1.131, 1.202, 1.273, 1.369, 1.465, 1.561, 1.657, 1.753, 1.806, 1.859, 1.912, 1.952, 1.992, 2.056, 2.12, 2.184, 2.248, 2.312, 2.38, 2.448, 2.516, 2.584, 2.652, 2.72, 2.788, 2.868, 2.962, 3.056, 3.15, 3.244, 3.338, 3.432, 3.526, 3.62, 3.714, 3.808, 3.902, 3.95, 3.998, 4.046, 4.094, 4.142, 4.192, 4.242, 4.292, 4.342, 4.392, 4.442, 4.492, 4.542, 4.595, 4.648, 4.701, 4.754, 4.807, 4.86, 4.932, 5.004, 5.076, 5.148, 5.22, 5.292, 5.364, 5.436, 5.508, 5.58, 5.628, 5.676, 5.724, 5.772, 5.82, 5.87, 5.92, 5.97, 6.02, 6.07, 6.12, 6.17, 6.22, 6.288, 6.356, 6.424, 6.492, 6.56, 6.628, 6.696, 6.764, 6.832, 6.9, 6.968, 7.036, 7.104, 7.172, 7.272, 7.372, 7.472, 7.572, 7.625, 7.678, 7.731, 7.784, 7.837, 7.89, 7.97, 8.05, 8.13, 8.21, 8.29, 8.37, 8.45, 8.53, 8.61, 8.69, 8.77, 8.85, 8.93, 9.09 ], "character_start_times_seconds": [ 0, 0.045, 0.09, 0.135, 0.18, 0.225, 0.27, 0.315, 0.368, 0.421, 0.474, 0.514, 0.554, 0.594, 0.634, 0.705, 0.776, 0.847, 0.918, 0.989, 1.06, 1.131, 1.202, 1.273, 1.369, 1.465, 1.561, 1.657, 1.753, 1.806, 1.859, 1.912, 1.952, 1.992, 2.056, 2.12, 2.184, 2.248, 2.312, 2.38, 2.448, 2.516, 2.584, 2.652, 2.72, 2.788, 2.868, 2.962, 3.056, 3.15, 3.244, 3.338, 3.432, 3.526, 3.62, 3.714, 3.808, 3.902, 3.95, 3.998, 4.046, 4.094, 4.142, 4.192, 4.242, 4.292, 4.342, 4.392, 4.442, 4.492, 4.542, 4.595, 4.648, 4.701, 4.754, 4.807, 4.86, 4.932, 5.004, 5.076, 5.148, 5.22, 5.292, 5.364, 5.436, 5.508, 5.58, 5.628, 5.676, 5.724, 5.772, 5.82, 5.87, 5.92, 5.97, 6.02, 6.07, 6.12, 6.17, 6.22, 6.288, 6.356, 6.424, 6.492, 6.56, 6.628, 6.696, 6.764, 6.832, 6.9, 6.968, 7.036, 7.104, 7.172, 7.272, 7.372, 7.472, 7.572, 7.625, 7.678, 7.731, 7.784, 7.837, 7.89, 7.97, 8.05, 8.13, 8.21, 8.29, 8.37, 8.45, 8.53, 8.61, 8.69, 8.77, 8.85, 8.93 ] } }
Votre requête coûtera $0.1 par exécution.
Pour $1 vous pouvez exécuter ce modèle environ 10 fois.
Eleven V3 (Alignment) turns text into natural speech and, at the same time, returns precise timing data for every character and word. You get an audio file plus alignment metadata, so you can drop the voice straight onto a timeline for subtitles, karaoke, lip-sync and fine-grained editing.
Compared with a normal TTS model (audio only), this version also outputs an alignment object containing, for example:
so the text and audio are tightly locked to each other.
text (required) – Script to be spoken. Recommended up to 5,000 characters per call.
voice_id (required) – Which Eleven voice to use (for example: Alice, Elli, George).
similarity (0–1) – How closely the output should match the base voice’s timbre and style.
stability (0–1) – Higher values give more consistent delivery; lower values allow more expressive variation.
use_speaker_boost (bool) – English text normalisation that improves numbers, dates and measurements.
Each run returns:
audio – URL of the generated audio file (MP3).
alignment – JSON metadata including:
You can parse this metadata to:
Billing is based on the length of the input text.
Anything below 1,000 characters is still billed as one full 1,000-character block.