
Unit tests verify individual components work. Integration tests verify they work together. For voice AI, this means testing the full flow from user speech to agent response - without actual microphones or speakers.
The Challenge
Real voice testing is:
- Slow: Audio processing takes time
- Flaky: Background noise, mic issues
- Expensive: STT/TTS API calls cost money
- Non-deterministic: Same phrase, different transcriptions
The solution? Mock voice services that give you deterministic, fast, free testing.
Architecture Overview
┌─────────────────────────────────────────────────────────────┐
│ Test Environment │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ Mock STT │───▶│ Your Agent │───▶│ Mock TTS │ │
│ │ (Text input) │ │ (Real logic) │ │ (Text output)│ │
│ └──────────────┘ └──────────────┘ └──────────────┘ │
│ ▲ │ │ │
│ │ ▼ ▼ │
│ Test provides Tool calls Assertions on │
│ transcripts (real or mock) spoken text │
│ │
└─────────────────────────────────────────────────────────────┘
Setting Up Mock Services
Mock Speech-to-Text
// test/mocks/mock-stt.ts
export class MockSTT {
private transcriptQueue: string[] = [];
private listeners: Map<string, Function[]> = new Map();
// Queue up what the "user" will say
queueTranscript(text: string) {
this.transcriptQueue.push(text);
}
queueTranscripts(texts: string[]) {
this.transcriptQueue.push(...texts);
}
// Simulate user speaking
async simulateSpeech(): Promise<string> {
const transcript = this.transcriptQueue.shift();
if (!transcript) {
throw new Error('No transcript queued');
}
// Simulate realistic timing
await this.delay(50);
this.emit('transcript', { text: transcript, isFinal: true });
return transcript;
}
on(event: string, callback: Function) {
if (!this.listeners.has(event)) {
this.listeners.set(event, []);
}
this.listeners.get(event)!.push(callback);
}
private emit(event: string, data: any) {
this.listeners.get(event)?.forEach(cb => cb(data));
}
private delay(ms: number) {
return new Promise(resolve => setTimeout(resolve, ms));
}
} Mock Text-to-Speech
// test/mocks/mock-tts.ts
export class MockTTS {
public spokenTexts: string[] = [];
private onSpeakCallback?: (text: string) => void;
async speak(text: string): Promise<void> {
this.spokenTexts.push(text);
this.onSpeakCallback?.(text);
// Simulate speech duration (rough estimate)
const duration = text.length * 10; // ~10ms per character
await new Promise(resolve => setTimeout(resolve, Math.min(duration, 100)));
}
onSpeak(callback: (text: string) => void) {
this.onSpeakCallback = callback;
}
getLastSpoken(): string | undefined {
return this.spokenTexts[this.spokenTexts.length - 1];
}
getAllSpoken(): string[] {
return [...this.spokenTexts];
}
clear() {
this.spokenTexts = [];
}
} Writing Integration Tests
Basic Conversation Flow
// test/integration/conversation-flow.test.ts
import { describe, it, expect, beforeEach } from 'vitest';
import { VoiceAgent } from '../src/voice-agent';
import { MockSTT } from './mocks/mock-stt';
import { MockTTS } from './mocks/mock-tts';
describe('Conversation Flow Integration', () => {
let agent: VoiceAgent;
let mockSTT: MockSTT;
let mockTTS: MockTTS;
beforeEach(() => {
mockSTT = new MockSTT();
mockTTS = new MockTTS();
agent = new VoiceAgent({
stt: mockSTT,
tts: mockTTS,
// Use real conversation logic, just mock I/O
});
});
it('should complete a booking flow', async () => {
// Queue the user's responses
mockSTT.queueTranscripts([
'I want to book a meeting',
'Tomorrow at 2pm',
'Yes, that works',
]);
// Start the conversation
await agent.start();
// Simulate the conversation
await mockSTT.simulateSpeech(); // "I want to book a meeting"
await agent.waitForResponse();
await mockSTT.simulateSpeech(); // "Tomorrow at 2pm"
await agent.waitForResponse();
await mockSTT.simulateSpeech(); // "Yes, that works"
await agent.waitForResponse();
// Verify the agent's responses
const responses = mockTTS.getAllSpoken();
expect(responses[0]).toContain('book');
expect(responses[1]).toContain('2pm');
expect(responses[2]).toContain('confirmed');
});
it('should handle clarification requests', async () => {
mockSTT.queueTranscripts([
'Schedule something', // Vague request
'A meeting with the team', // Clarification
]);
await agent.start();
await mockSTT.simulateSpeech();
await agent.waitForResponse();
// Agent should ask for clarification
expect(mockTTS.getLastSpoken()).toMatch(/what|when|who|which/i);
await mockSTT.simulateSpeech();
await agent.waitForResponse();
// Now agent should proceed
expect(mockTTS.getLastSpoken()).not.toMatch(/what|when|who|which/i);
});
}); Testing Tool Integration
// test/integration/tool-integration.test.ts
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { VoiceAgent } from '../src/voice-agent';
import { MockSTT } from './mocks/mock-stt';
import { MockTTS } from './mocks/mock-tts';
describe('Tool Integration', () => {
let agent: VoiceAgent;
let mockSTT: MockSTT;
let mockTTS: MockTTS;
let mockCalendarAPI: any;
beforeEach(() => {
mockSTT = new MockSTT();
mockTTS = new MockTTS();
mockCalendarAPI = {
getEvents: vi.fn().mockResolvedValue([
{ title: 'Team Standup', time: '10:00 AM' },
{ title: 'Product Review', time: '2:00 PM' },
]),
createEvent: vi.fn().mockResolvedValue({ id: 'evt_123' }),
};
agent = new VoiceAgent({
stt: mockSTT,
tts: mockTTS,
tools: { calendar: mockCalendarAPI },
});
});
it('should fetch and speak calendar events', async () => {
mockSTT.queueTranscript("What's on my calendar today?");
await agent.start();
await mockSTT.simulateSpeech();
await agent.waitForResponse();
// Verify API was called
expect(mockCalendarAPI.getEvents).toHaveBeenCalled();
// Verify response mentions the events
const response = mockTTS.getLastSpoken()!;
expect(response).toContain('Team Standup');
expect(response).toContain('Product Review');
});
it('should create calendar events from voice', async () => {
mockSTT.queueTranscripts([
'Schedule a meeting with Sarah tomorrow at 3pm',
'Yes, confirm it',
]);
await agent.start();
await mockSTT.simulateSpeech();
await agent.waitForResponse();
await mockSTT.simulateSpeech();
await agent.waitForResponse();
// Verify event was created
expect(mockCalendarAPI.createEvent).toHaveBeenCalledWith(
expect.objectContaining({
title: expect.stringContaining('Sarah'),
time: expect.stringContaining('3'),
})
);
});
}); Testing Error Recovery
// test/integration/error-recovery.test.ts
describe('Error Recovery', () => {
it('should recover from API failures gracefully', async () => {
const failingAPI = {
getEvents: vi.fn()
.mockRejectedValueOnce(new Error('Network error'))
.mockResolvedValueOnce([{ title: 'Meeting' }]),
};
const agent = new VoiceAgent({
stt: mockSTT,
tts: mockTTS,
tools: { calendar: failingAPI },
});
mockSTT.queueTranscripts([
"What's on my calendar?",
"Try again",
]);
await agent.start();
// First attempt fails
await mockSTT.simulateSpeech();
await agent.waitForResponse();
expect(mockTTS.getLastSpoken()).toMatch(/trouble|sorry|try again/i);
// Retry succeeds
await mockSTT.simulateSpeech();
await agent.waitForResponse();
expect(mockTTS.getLastSpoken()).toContain('Meeting');
});
it('should handle STT failures', async () => {
mockSTT.on('error', () => {});
// Simulate STT error
await agent.start();
agent.handleSTTError(new Error('Microphone unavailable'));
expect(mockTTS.getLastSpoken()).toMatch(/hear|microphone|trouble/i);
});
}); Test Utilities
Conversation Test Helper
// test/helpers/conversation-tester.ts
export class ConversationTester {
private agent: VoiceAgent;
private mockSTT: MockSTT;
private mockTTS: MockTTS;
constructor(agentConfig: Partial<VoiceAgentConfig> = {}) {
this.mockSTT = new MockSTT();
this.mockTTS = new MockTTS();
this.agent = new VoiceAgent({
stt: this.mockSTT,
tts: this.mockTTS,
...agentConfig,
});
}
async say(text: string): Promise<string> {
this.mockSTT.queueTranscript(text);
await this.mockSTT.simulateSpeech();
await this.agent.waitForResponse();
return this.mockTTS.getLastSpoken()!;
}
async conversation(exchanges: Array<{ user: string; expectContains?: string[] }>) {
await this.agent.start();
for (const exchange of exchanges) {
const response = await this.say(exchange.user);
if (exchange.expectContains) {
for (const expected of exchange.expectContains) {
expect(response.toLowerCase()).toContain(expected.toLowerCase());
}
}
}
}
getAllResponses(): string[] {
return this.mockTTS.getAllSpoken();
}
}
// Usage in tests
it('should handle multi-turn conversation', async () => {
const tester = new ConversationTester();
await tester.conversation([
{ user: 'Hello', expectContains: ['hi', 'hello', 'hey'] },
{ user: 'Book a meeting', expectContains: ['when', 'time'] },
{ user: 'Tomorrow at 3', expectContains: ['3', 'pm', 'tomorrow'] },
]);
}); CI Configuration
# .github/workflows/integration-tests.yml
name: Integration Tests
on:
push:
branches: [main]
pull_request:
jobs:
integration:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v2
- uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'pnpm'
- run: pnpm install
- run: pnpm test:integration
env:
NODE_ENV: test Best Practices
- Keep mocks simple - Only mock what's necessary
- Test real logic - Don't mock your own code
- Use realistic data - Transcripts should match real user speech patterns
- Test edge cases - Empty responses, timeouts, interruptions
- Measure coverage - Aim for 80%+ on integration paths
Next Steps
- E2E Testing: Full browser-based testing with Playwright
- Load Testing: Test concurrent conversation handling
- Chaos Testing: Simulate random failures