diff --git a/extensions/tts-local-cli/speech-provider.test.ts b/extensions/tts-local-cli/speech-provider.test.ts index 6ea204a7638..da80ecdfb80 100644 --- a/extensions/tts-local-cli/speech-provider.test.ts +++ b/extensions/tts-local-cli/speech-provider.test.ts @@ -73,6 +73,28 @@ async function synthesize(params: { }); } +function parseAudioPayload(result: { audioBuffer: Buffer }) { + return JSON.parse(result.audioBuffer.toString("utf8")) as { + stdin?: string; + textArg?: string; + }; +} + +function requireFfmpegArgs(index = 0) { + const args = runFfmpegMock.mock.calls[index]?.[0]; + if (!args) { + throw new Error(`runFfmpeg call ${index} missing`); + } + return args; +} + +function expectArgsContainSequence(args: string[], sequence: string[]) { + const startIndex = args.findIndex((arg, index) => + sequence.every((expected, offset) => args[index + offset] === expected), + ); + expect(startIndex).toBeGreaterThanOrEqual(0); +} + describe("buildCliSpeechProvider", () => { beforeEach(() => { runFfmpegMock.mockImplementation(async (args) => { @@ -127,15 +149,12 @@ describe("buildCliSpeechProvider", () => { text: "hello 😀 world", }); - expect(result).toMatchObject({ - outputFormat: "mp3", - fileExtension: ".mp3", - voiceCompatible: false, - }); - expect(JSON.parse(result.audioBuffer.toString("utf8"))).toMatchObject({ - stdin: "hello world", - textArg: "", - }); + expect(result.outputFormat).toBe("mp3"); + expect(result.fileExtension).toBe(".mp3"); + expect(result.voiceCompatible).toBe(false); + const audioPayload = parseAudioPayload(result); + expect(audioPayload.stdin).toBe("hello world"); + expect(audioPayload.textArg).toBe(""); expect(runFfmpegMock).not.toHaveBeenCalled(); } finally { rmSync(fixture.dir, { recursive: true, force: true }); @@ -153,15 +172,12 @@ describe("buildCliSpeechProvider", () => { text: "spoken words", }); - expect(result).toMatchObject({ - outputFormat: "wav", - fileExtension: ".wav", - voiceCompatible: false, - }); - expect(JSON.parse(result.audioBuffer.toString("utf8"))).toMatchObject({ - stdin: "", - textArg: "spoken words", - }); + expect(result.outputFormat).toBe("wav"); + expect(result.fileExtension).toBe(".wav"); + expect(result.voiceCompatible).toBe(false); + const audioPayload = parseAudioPayload(result); + expect(audioPayload.stdin).toBe(""); + expect(audioPayload.textArg).toBe("spoken words"); } finally { rmSync(fixture.dir, { recursive: true, force: true }); } @@ -184,9 +200,7 @@ describe("buildCliSpeechProvider", () => { fileExtension: ".ogg", voiceCompatible: true, }); - expect(runFfmpegMock).toHaveBeenCalledWith( - expect.arrayContaining(["-c:a", "libopus", "-b:a", "64k"]), - ); + expectArgsContainSequence(requireFfmpegArgs(), ["-c:a", "libopus", "-b:a", "64k"]); } finally { rmSync(fixture.dir, { recursive: true, force: true }); } @@ -208,9 +222,7 @@ describe("buildCliSpeechProvider", () => { fileExtension: ".mp3", voiceCompatible: false, }); - expect(runFfmpegMock).toHaveBeenCalledWith( - expect.arrayContaining(["-c:a", "libmp3lame", "-b:a", "128k"]), - ); + expectArgsContainSequence(requireFfmpegArgs(), ["-c:a", "libmp3lame", "-b:a", "128k"]); } finally { rmSync(fixture.dir, { recursive: true, force: true }); } @@ -234,9 +246,7 @@ describe("buildCliSpeechProvider", () => { outputFormat: "pcm", sampleRate: 16000, }); - expect(runFfmpegMock).toHaveBeenCalledWith( - expect.arrayContaining(["-ar", "16000", "-ac", "1", "-f", "s16le"]), - ); + expectArgsContainSequence(requireFfmpegArgs(), ["-ar", "16000", "-ac", "1", "-f", "s16le"]); } finally { rmSync(fixture.dir, { recursive: true, force: true }); }