
Voice AI debugging is uniquely challenging. Unlike web apps where you can inspect network requests, voice applications involve audio streams, real-time transcription, LLM reasoning, and audio synthesis - all happening in milliseconds. This guide gives you a systematic approach to find and fix issues fast.
The Voice AI Debug Stack
┌─────────────────────────────────────────────────────────────┐
│ Where Bugs Hide │
├─────────────────────────────────────────────────────────────┤
│ │
│ Layer 1: Audio Input │
│ └─ Microphone issues, noise, gain levels │
│ │
│ Layer 2: Speech-to-Text │
│ └─ Transcription errors, language detection, latency │
│ │
│ Layer 3: Understanding │
│ └─ Intent misclassification, entity extraction failures │
│ │
│ Layer 4: Agent Logic │
│ └─ Wrong tool calls, context loss, state corruption │
│ │
│ Layer 5: Response Generation │
│ └─ Hallucinations, wrong tone, missing information │
│ │
│ Layer 6: Text-to-Speech │
│ └─ Pronunciation, pacing, voice selection │
│ │
│ Layer 7: Audio Output │
│ └─ Playback issues, latency, interruption handling │
│ │
└─────────────────────────────────────────────────────────────┘
Essential Debugging Tools
1. Conversation Replay
The most powerful debugging tool is conversation replay - recording everything and playing it back:
// src/debug/conversation-recorder.ts
export class ConversationRecorder {
private events: ConversationEvent[] = [];
private sessionId: string;
constructor() {
this.sessionId = crypto.randomUUID();
}
record(event: ConversationEvent) {
this.events.push({
...event,
timestamp: Date.now(),
sessionId: this.sessionId,
});
}
// Record user speech
recordUserSpeech(transcript: string, audioLevel: number) {
this.record({
type: 'user_speech',
data: { transcript, audioLevel },
});
}
// Record agent response
recordAgentResponse(text: string, toolCalls?: any[]) {
this.record({
type: 'agent_response',
data: { text, toolCalls },
});
}
// Record tool execution
recordToolCall(name: string, args: any, result: any, duration: number) {
this.record({
type: 'tool_call',
data: { name, args, result, duration },
});
}
// Export for analysis
export(): string {
return JSON.stringify({
sessionId: this.sessionId,
events: this.events,
duration: this.events.length > 0
? this.events[this.events.length - 1].timestamp - this.events[0].timestamp
: 0,
}, null, 2);
}
// Upload to debugging service
async upload(): Promise<string> {
const response = await fetch('/api/debug/sessions', {
method: 'POST',
body: this.export(),
});
const { url } = await response.json();
return url; // https://debug.ferni.ai/sessions/abc123
}
} 2. Real-time Debug Panel
Build a debug panel that shows what's happening in real-time:
// src/debug/debug-panel.ts
export class DebugPanel {
private container: HTMLElement;
constructor() {
this.container = this.createPanel();
this.attachToDOM();
}
private createPanel(): HTMLElement {
const panel = document.createElement('div');
panel.id = 'ferni-debug-panel';
panel.innerHTML = `
<div class="debug-header">
<span>Ferni Debug</span>
<button onclick="this.parentElement.parentElement.classList.toggle('collapsed')">
Toggle
</button>
</div>
<div class="debug-sections">
<div class="debug-section" id="debug-audio">
<h4>Audio</h4>
<div class="audio-meter"></div>
<span class="audio-status">Listening...</span>
</div>
<div class="debug-section" id="debug-transcript">
<h4>Transcript</h4>
<pre class="transcript-output"></pre>
</div>
<div class="debug-section" id="debug-agent">
<h4>Agent State</h4>
<pre class="agent-state"></pre>
</div>
<div class="debug-section" id="debug-tools">
<h4>Tool Calls</h4>
<div class="tool-calls-list"></div>
</div>
<div class="debug-section" id="debug-latency">
<h4>Latency</h4>
<div class="latency-bars"></div>
</div>
</div>
`;
return panel;
}
updateAudioLevel(level: number) {
const meter = this.container.querySelector('.audio-meter') as HTMLElement;
meter.style.width = `${level * 100}%`;
}
updateTranscript(text: string, isFinal: boolean) {
const output = this.container.querySelector('.transcript-output')!;
output.textContent = text;
output.classList.toggle('final', isFinal);
}
updateAgentState(state: any) {
const stateEl = this.container.querySelector('.agent-state')!;
stateEl.textContent = JSON.stringify(state, null, 2);
}
addToolCall(name: string, duration: number, success: boolean) {
const list = this.container.querySelector('.tool-calls-list')!;
const item = document.createElement('div');
item.className = `tool-call ${success ? 'success' : 'error'}`;
item.innerHTML = `
<span class="tool-name">${name}</span>
<span class="tool-duration">${duration}ms</span>
`;
list.appendChild(item);
}
} 3. Structured Logging
Use structured logging to trace issues across the stack:
// src/debug/logger.ts
import pino from 'pino';
export const logger = pino({
level: process.env.LOG_LEVEL || 'info',
formatters: {
level: (label) => ({ level: label }),
},
base: {
service: 'voice-agent',
version: process.env.npm_package_version,
},
});
// Create child loggers for each component
export const audioLogger = logger.child({ component: 'audio' });
export const sttLogger = logger.child({ component: 'stt' });
export const agentLogger = logger.child({ component: 'agent' });
export const ttsLogger = logger.child({ component: 'tts' });
// Usage
audioLogger.debug({ level: 0.7, noiseFloor: 0.1 }, 'Audio level detected');
sttLogger.info({ transcript: 'Hello', confidence: 0.95 }, 'Transcript received');
agentLogger.warn({ context: ctx }, 'Context approaching token limit'); Debugging by Symptom
"The agent doesn't respond"
Checklist:
Check audio input
// Is the microphone working? audioLogger.debug({ level: audioMeter.getLevel(), isActive: audioMeter.isActive }, 'Audio check');Check STT connection
// Is transcription working? sttLogger.debug({ connected: stt.isConnected, lastTranscript: stt.getLastTranscript(), timeSinceLastTranscript: Date.now() - stt.lastTranscriptTime }, 'STT check');Check agent state
// Is the agent stuck? agentLogger.debug({ state: agent.getState(), pendingToolCalls: agent.getPendingToolCalls(), lastActivity: agent.getLastActivityTime() }, 'Agent check');
"The agent misunderstands me"
Debug STT quality:
// Log all transcripts with confidence scores
stt.on('transcript', (event) => {
sttLogger.info({
transcript: event.text,
confidence: event.confidence,
alternatives: event.alternatives,
language: event.detectedLanguage,
}, 'Transcript received');
// Flag low-confidence transcripts
if (event.confidence < 0.7) {
sttLogger.warn({
transcript: event.text,
confidence: event.confidence,
}, 'Low confidence transcript - may cause issues');
}
}); Debug intent classification:
// Log intent detection results
agentLogger.info({
userInput: transcript,
detectedIntent: intent.name,
confidence: intent.confidence,
entities: intent.entities,
alternativeIntents: intent.alternatives,
}, 'Intent classified'); "The agent calls the wrong tool"
Trace tool selection:
// Before tool call
agentLogger.info({
availableTools: tools.map(t => t.name),
userIntent: intent,
selectedTool: selectedTool.name,
selectionReason: selectedTool.reason,
}, 'Tool selection');
// After tool call
agentLogger.info({
tool: selectedTool.name,
args: toolArgs,
result: toolResult,
duration: toolDuration,
success: !toolResult.error,
}, 'Tool execution'); "Responses are slow"
Add timing instrumentation:
// src/debug/timing.ts
export class TimingTracer {
private marks: Map<string, number> = new Map();
mark(name: string) {
this.marks.set(name, performance.now());
}
measure(name: string, startMark: string): number {
const start = this.marks.get(startMark);
if (!start) return -1;
const duration = performance.now() - start;
logger.debug({
metric: name,
duration,
startMark,
}, 'Timing measurement');
return duration;
}
// Use in conversation flow
async traceConversation(input: string): Promise<void> {
this.mark('start');
this.mark('stt_start');
const transcript = await stt.transcribe(input);
const sttDuration = this.measure('stt', 'stt_start');
this.mark('agent_start');
const response = await agent.process(transcript);
const agentDuration = this.measure('agent', 'agent_start');
this.mark('tts_start');
await tts.speak(response);
const ttsDuration = this.measure('tts', 'tts_start');
const totalDuration = this.measure('total', 'start');
logger.info({
stt: sttDuration,
agent: agentDuration,
tts: ttsDuration,
total: totalDuration,
}, 'Conversation timing breakdown');
}
} Advanced Debugging Techniques
1. Conversation Diffing
Compare expected vs actual conversation flows:
// test/debug/conversation-diff.ts
export function diffConversation(
expected: ConversationTurn[],
actual: ConversationTurn[]
): ConversationDiff[] {
const diffs: ConversationDiff[] = [];
for (let i = 0; i < Math.max(expected.length, actual.length); i++) {
const exp = expected[i];
const act = actual[i];
if (!exp) {
diffs.push({ type: 'extra', turn: i, actual: act });
} else if (!act) {
diffs.push({ type: 'missing', turn: i, expected: exp });
} else if (!turnsMatch(exp, act)) {
diffs.push({ type: 'mismatch', turn: i, expected: exp, actual: act });
}
}
return diffs;
} 2. State Time Travel
Capture and replay state at any point:
// src/debug/state-time-travel.ts
export class StateTimeTravel {
private snapshots: StateSnapshot[] = [];
private maxSnapshots = 100;
snapshot(state: AgentState) {
this.snapshots.push({
timestamp: Date.now(),
state: structuredClone(state),
});
if (this.snapshots.length > this.maxSnapshots) {
this.snapshots.shift();
}
}
getSnapshotAt(timestamp: number): StateSnapshot | undefined {
return this.snapshots.find(s => s.timestamp >= timestamp);
}
replayFrom(timestamp: number): StateSnapshot[] {
return this.snapshots.filter(s => s.timestamp >= timestamp);
}
exportTimeline(): string {
return JSON.stringify(this.snapshots, null, 2);
}
} 3. Audio Waveform Analysis
Debug audio issues visually:
// src/debug/audio-visualizer.ts
export class AudioVisualizer {
private canvas: HTMLCanvasElement;
private ctx: CanvasRenderingContext2D;
private analyser: AnalyserNode;
visualize() {
const bufferLength = this.analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
const draw = () => {
requestAnimationFrame(draw);
this.analyser.getByteTimeDomainData(dataArray);
this.ctx.fillStyle = '#1a1a2e';
this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height);
this.ctx.lineWidth = 2;
this.ctx.strokeStyle = '#4a6741';
this.ctx.beginPath();
const sliceWidth = this.canvas.width / bufferLength;
let x = 0;
for (let i = 0; i < bufferLength; i++) {
const v = dataArray[i] / 128.0;
const y = (v * this.canvas.height) / 2;
if (i === 0) {
this.ctx.moveTo(x, y);
} else {
this.ctx.lineTo(x, y);
}
x += sliceWidth;
}
this.ctx.lineTo(this.canvas.width, this.canvas.height / 2);
this.ctx.stroke();
};
draw();
}
} Production Debugging
Error Reporting Integration
// src/debug/error-reporter.ts
import * as Sentry from '@sentry/node';
export function initErrorReporting() {
Sentry.init({
dsn: process.env.SENTRY_DSN,
environment: process.env.NODE_ENV,
tracesSampleRate: 0.1,
});
}
export function reportConversationError(
error: Error,
context: ConversationContext
) {
Sentry.withScope((scope) => {
scope.setTag('component', 'voice-agent');
scope.setContext('conversation', {
sessionId: context.sessionId,
turnCount: context.turnCount,
lastIntent: context.lastIntent,
});
scope.setContext('audio', {
sttProvider: context.sttProvider,
ttsProvider: context.ttsProvider,
audioQuality: context.audioQuality,
});
Sentry.captureException(error);
});
} Debug Checklist
When something goes wrong, work through this checklist:
- [ ] Check browser console for errors
- [ ] Verify microphone permissions granted
- [ ] Check network tab for failed API calls
- [ ] Review conversation recording
- [ ] Check STT confidence scores
- [ ] Verify tool call arguments
- [ ] Check response latency breakdown
- [ ] Review agent state at time of issue
- [ ] Check for token limit issues
- [ ] Verify TTS audio is playing
Need help? Share your debug recording in Discord and we'll help diagnose the issue.