client.apiStatus.get() -> Cartesia.ApiInfo
-
-
-
await client.apiStatus.get();
-
-
-
requestOptions:
ApiStatus.RequestOptions
-
-
client.datasets.list() -> Cartesia.PaginatedDatasets
-
-
-
await client.datasets.list();
-
-
-
requestOptions:
Datasets.RequestOptions
-
-
client.datasets.create({ ...params }) -> Cartesia.Dataset
-
-
-
await client.datasets.create({ name: "name", });
-
-
-
request:
Cartesia.CreateDatasetRequest
-
requestOptions:
Datasets.RequestOptions
-
-
client.datasets.listFiles(id) -> Cartesia.PaginatedDatasetFiles
-
-
-
await client.datasets.listFiles("id");
-
-
-
id:
string
-
requestOptions:
Datasets.RequestOptions
-
-
client.infill.bytes(leftAudio, rightAudio, { ...params }) -> stream.Readable
-
-
-
Generate audio that smoothly connects two existing audio segments. This is useful for inserting new speech between existing speech segments while maintaining natural transitions.
The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.
Only the
sonic-preview
model is supported for infill at this time.At least one of
left_audio
orright_audio
must be provided.As with all generative models, there's some inherent variability, but here's some tips we recommend to get the best results from infill:
- Use longer infill transcripts
- This gives the model more flexibility to adapt to the rest of the audio
- Target natural pauses in the audio when deciding where to clip
- This means you don't need word-level timestamps to be as precise
- Clip right up to the start and end of the audio segment you want infilled, keeping as much silence in the left/right audio segments as possible
- This helps the model generate more natural transitions
- Use longer infill transcripts
-
-
-
await client.infill.bytes(fs.createReadStream("/path/to/your/file"), fs.createReadStream("/path/to/your/file"), { modelId: "sonic-preview", language: "en", transcript: "middle segment", voiceId: "694f9389-aac1-45b6-b726-9d9369183238", outputFormatContainer: "mp3", outputFormatSampleRate: 44100, outputFormatBitRate: 128000, voiceExperimentalControlsSpeed: "slowest", voiceExperimentalControlsEmotion: ["surprise:high", "curiosity:high"], });
-
-
-
leftAudio:
File | fs.ReadStream | Blob
-
rightAudio:
File | fs.ReadStream | Blob
-
request:
Cartesia.InfillBytesRequest
-
requestOptions:
Infill.RequestOptions
-
-
client.tts.bytes({ ...params }) -> stream.Readable
-
-
-
await client.tts.bytes({ modelId: "sonic-english", transcript: "Hello, world!", voice: { mode: "id", id: "694f9389-aac1-45b6-b726-9d9369183238", }, language: "en", outputFormat: { container: "mp3", sampleRate: 44100, bitRate: 128000, }, });
-
-
-
request:
Cartesia.TtsRequest
-
requestOptions:
Tts.RequestOptions
-
-
client.tts.sse({ ...params }) -> core.Stream
-
-
-
const response = await client.tts.sse({ modelId: "sonic-english", transcript: "Hello, world!", voice: { mode: "id", id: "694f9389-aac1-45b6-b726-9d9369183238", }, language: "en", outputFormat: { container: "raw", sampleRate: 44100, encoding: "pcm_f32le", }, }); for await (const item of response) { console.log(item); }
-
-
-
request:
Cartesia.TtsRequest
-
requestOptions:
Tts.RequestOptions
-
-
client.voiceChanger.bytes(clip, { ...params }) -> stream.Readable
-
-
-
Takes an audio file of speech, and returns an audio file of speech spoken with the same intonation, but with a different voice.
This endpoint is priced at 15 characters per second of input audio.
-
-
-
await client.voiceChanger.bytes(fs.createReadStream("/path/to/your/file"), { voiceId: "694f9389-aac1-45b6-b726-9d9369183238", outputFormatContainer: "mp3", outputFormatSampleRate: 44100, outputFormatBitRate: 128000, });
-
-
-
clip:
File | fs.ReadStream | Blob
-
request:
Cartesia.VoiceChangerBytesRequest
-
requestOptions:
VoiceChanger.RequestOptions
-
-
client.voiceChanger.sse(clip, { ...params }) -> core.Stream
-
-
-
const response = await client.voiceChanger.sse(fs.createReadStream("/path/to/your/file"), { voiceId: "694f9389-aac1-45b6-b726-9d9369183238", outputFormatContainer: "mp3", outputFormatSampleRate: 44100, outputFormatBitRate: 128000, }); for await (const item of response) { console.log(item); }
-
-
-
clip:
File | fs.ReadStream | Blob
-
request:
Cartesia.VoiceChangerSseRequest
-
requestOptions:
VoiceChanger.RequestOptions
-
-
client.voices.list() -> Cartesia.Voice[]
-
-
-
await client.voices.list();
-
-
-
requestOptions:
Voices.RequestOptions
-
-
client.voices.clone(clip, { ...params }) -> Cartesia.VoiceMetadata
-
-
-
Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
-
-
-
await client.voices.clone(fs.createReadStream("/path/to/your/file"), { name: "A high-stability cloned voice", description: "Copied from Cartesia docs", mode: "stability", language: "en", enhance: true, });
-
-
-
clip:
File | fs.ReadStream | Blob
-
request:
Cartesia.CloneVoiceRequest
-
requestOptions:
Voices.RequestOptions
-
-
client.voices.delete(id) -> void
-
-
-
await client.voices.delete("id");
-
-
-
id:
Cartesia.VoiceId
-
requestOptions:
Voices.RequestOptions
-
-
client.voices.update(id, { ...params }) -> Cartesia.Voice
-
-
-
await client.voices.update("id", { name: "name", description: "description", });
-
-
-
id:
Cartesia.VoiceId
-
request:
Cartesia.UpdateVoiceRequest
-
requestOptions:
Voices.RequestOptions
-
-
client.voices.get(id) -> Cartesia.Voice
-
-
-
await client.voices.get("id");
-
-
-
id:
Cartesia.VoiceId
-
requestOptions:
Voices.RequestOptions
-
-
client.voices.localize({ ...params }) -> Cartesia.EmbeddingResponse
-
-
-
await client.voices.localize({ embedding: [1.1, 1.1], language: "en", originalSpeakerGender: "male", dialect: undefined, });
-
-
-
request:
Cartesia.LocalizeVoiceRequest
-
requestOptions:
Voices.RequestOptions
-
-
client.voices.mix({ ...params }) -> Cartesia.EmbeddingResponse
-
-
-
await client.voices.mix({ voices: [ { id: "id", weight: 1.1, }, { id: "id", weight: 1.1, }, ], });
-
-
-
request:
Cartesia.MixVoicesRequest
-
requestOptions:
Voices.RequestOptions
-
-
client.voices.create({ ...params }) -> Cartesia.Voice
-
-
-
Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
-
-
-
await client.voices.create({ name: "My Custom Voice", description: "A custom voice created through the API", embedding: [], language: "en", baseVoiceId: "123e4567-e89b-12d3-a456-426614174000", });
-
-
-
request:
Cartesia.CreateVoiceRequest
-
requestOptions:
Voices.RequestOptions
-
-