Voice AI

LLM Integration

OpenAI and Claude for conversational AI

LLM Integration

Using OpenAI or Claude for generating conversational responses with streaming.

OpenAI Streaming

const OpenAI = require('openai');

const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY
});

async function* streamCompletion(messages) {
  const stream = await openai.chat.completions.create({
    model: 'gpt-4',
    messages: messages,
    stream: true
  });
  
  for await (const chunk of stream) {
    const content = chunk.choices[0]?.delta?.content;
    if (content) {
      yield content;
    }
  }
}

// Usage
for await (const text of streamCompletion(messages)) {
  // Send each chunk to TTS immediately
  ttsService.speak(text);
}

Claude Streaming

const Anthropic = require('@anthropic-ai/sdk');

const anthropic = new Anthropic({
  apiKey: process.env.CLAUDE_API_KEY
});

async function* streamClaude(messages) {
  const stream = await anthropic.messages.stream({
    model: 'claude-3-sonnet-20240229',
    max_tokens: 1024,
    messages: messages
  });
  
  for await (const event of stream) {
    if (event.type === 'content_block_delta') {
      yield event.delta.text;
    }
  }
}

System Prompt for Voice

Voice conversations need specific prompting:

const systemPrompt = `You are a helpful phone assistant.

Guidelines:
- Keep responses brief (1-2 sentences when possible)
- Use conversational language, not formal writing
- Don't use markdown, lists, or formatting
- Add a '•' symbol every 5-10 words at natural pauses
- Ask only one question at a time
- If unclear, ask for clarification

Personality: Friendly and helpful, but efficient.`;

Conversation History

Maintain context across turns:

class ConversationContext {
  constructor(systemPrompt) {
    this.messages = [
      { role: 'system', content: systemPrompt }
    ];
  }
  
  addUserMessage(text) {
    this.messages.push({ role: 'user', content: text });
  }
  
  addAssistantMessage(text) {
    this.messages.push({ role: 'assistant', content: text });
  }
  
  getMessages() {
    // Keep last N messages to avoid token limits
    const maxMessages = 20;
    if (this.messages.length > maxMessages) {
      return [
        this.messages[0], // Keep system prompt
        ...this.messages.slice(-maxMessages + 1)
      ];
    }
    return this.messages;
  }
}

Function Calling

Let the LLM trigger actions:

const tools = [
  {
    type: 'function',
    function: {
      name: 'transfer_call',
      description: 'Transfer the call to a human agent',
      parameters: {
        type: 'object',
        properties: {
          department: {
            type: 'string',
            enum: ['sales', 'support', 'billing']
          }
        },
        required: ['department']
      }
    }
  },
  {
    type: 'function',
    function: {
      name: 'check_order_status',
      description: 'Check the status of an order',
      parameters: {
        type: 'object',
        properties: {
          order_id: { type: 'string' }
        },
        required: ['order_id']
      }
    }
  }
];

const response = await openai.chat.completions.create({
  model: 'gpt-4',
  messages: messages,
  tools: tools,
  tool_choice: 'auto'
});

// Handle function calls
if (response.choices[0].message.tool_calls) {
  for (const call of response.choices[0].message.tool_calls) {
    const result = await executeFunction(call.function.name, call.function.arguments);
    // Add result to conversation and continue
  }
}

Handling Partial Transcriptions

Don't send incomplete sentences to LLM:

class TranscriptionBuffer {
  constructor() {
    this.buffer = '';
    this.timeout = null;
  }
  
  add(text, isFinal) {
    clearTimeout(this.timeout);
    
    if (isFinal) {
      this.buffer += text + ' ';
      return this.flush();
    }
    
    // Wait for more input or timeout
    this.timeout = setTimeout(() => {
      if (this.buffer.trim()) {
        return this.flush();
      }
    }, 1000);
    
    return null;
  }
  
  flush() {
    const text = this.buffer.trim();
    this.buffer = '';
    return text || null;
  }
}

Error Handling

async function safeCompletion(messages, retries = 3) {
  for (let i = 0; i < retries; i++) {
    try {
      return await streamCompletion(messages);
    } catch (error) {
      if (error.status === 429) {
        // Rate limited, wait and retry
        await sleep(Math.pow(2, i) * 1000);
        continue;
      }
      throw error;
    }
  }
  throw new Error('Max retries exceeded');
}

Edit this pageorReport an issue

WebSocket & Streaming

Real-time bidirectional communication patterns

Speech Services

Speech-to-Text and Text-to-Speech with Deepgram and ElevenLabs