From 5eaa72fafa09cee522c0fbf66012012e92e9e890 Mon Sep 17 00:00:00 2001 From: boli-bohan Date: Mon, 6 Jan 2025 11:39:39 -0800 Subject: [PATCH] Revert "add loudness docs" (#57) --- fern/definition/tts.yml | 85 +++++++++-------------------------------- 1 file changed, 18 insertions(+), 67 deletions(-) diff --git a/fern/definition/tts.yml b/fern/definition/tts.yml index a908215..b5bd4e8 100644 --- a/fern/definition/tts.yml +++ b/fern/definition/tts.yml @@ -12,7 +12,7 @@ service: path: /bytes method: POST display-name: Text to Speech (Bytes) - request: TTSBytesRequest + request: TTSRequest response: file examples: - name: MP3 @@ -27,7 +27,6 @@ service: container: "mp3" sample_rate: 44100 bit_rate: 128000 - loudness: -17 - name: WAV request: model_id: "sonic-english" @@ -40,7 +39,6 @@ service: container: "wav" sample_rate: 44100 encoding: "pcm_f32le" - loudness: -17 - name: RAW request: model_id: "sonic-english" @@ -53,13 +51,12 @@ service: container: "raw" sample_rate: 44100 encoding: "pcm_f32le" - loudness: -17 sse: path: /sse method: POST display-name: Text to Speech (SSE) - request: TTSSSERequest + request: TTSRequest response-stream: type: WebSocketResponse format: sse @@ -325,6 +322,22 @@ types: continue: optional context_id: optional + TTSRequest: + properties: + model_id: + type: string + docs: | + The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models. + transcript: string + voice: TTSRequestVoiceSpecifier + language: optional + output_format: OutputFormat + duration: + type: optional + docs: | + The maximum duration of the audio in seconds. You do not usually need to specify this. + If the duration is not appropriate for the length of the transcript, the output audio may be truncated. + SupportedLanguage: docs: | The language that the given voice should speak the transcript in. @@ -359,15 +372,6 @@ types: encoding: RawEncoding sample_rate: integer - RawBytesOutputFormat: - extends: RawOutputFormat - properties: - loudness: - type: optional - docs: | - The loudness of the audio in LUFS. - Supports values between -14 (loudest) and -24 (quietest). - RawEncoding: enum: - pcm_f32le @@ -378,9 +382,6 @@ types: WAVOutputFormat: extends: RawOutputFormat - WAVBytesOutputFormat: - extends: RawBytesOutputFormat - MP3OutputFormat: properties: sample_rate: integer @@ -388,11 +389,6 @@ types: type: integer docs: | The bit rate of the audio in bits per second. Supported bit rates are 32000, 64000, 96000, 128000, 192000. - loudness: - type: optional - docs: | - The loudness of the audio in LUFS. - Supports values between -14 (loudest) and -24 (quietest). TTSRequestVoiceSpecifier: discriminated: false @@ -478,48 +474,3 @@ types: name: CURIOSITY_HIGH - value: curiosity:highest name: CURIOSITY_HIGHEST - - TTSBytesRequest: - properties: - model_id: - type: string - docs: | - The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models. - transcript: string - voice: TTSRequestVoiceSpecifier - language: optional - output_format: BytesOutputFormat - duration: - type: optional - docs: | - The maximum duration of the audio in seconds. You do not usually need to specify this. - If the duration is not appropriate for the length of the transcript, the output audio may be truncated. - - BytesOutputFormat: - discriminant: container - union: - raw: RawBytesOutputFormat - wav: WAVBytesOutputFormat - mp3: MP3OutputFormat - - TTSSSERequest: - properties: - model_id: - type: string - docs: | - The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models. - transcript: string - voice: TTSRequestVoiceSpecifier - language: optional - output_format: SSEOutputFormat - duration: - type: optional - docs: | - The maximum duration of the audio in seconds. You do not usually need to specify this. - If the duration is not appropriate for the length of the transcript, the output audio may be truncated. - - SSEOutputFormat: - discriminant: container - union: - raw: RawOutputFormat - wav: WAVOutputFormat