Using OpenAI or Claude for generating conversational responses with streaming.
const OpenAI = require('openai');
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY
});
async function* streamCompletion(messages) {
const stream = await openai.chat.completions.create({
model: 'gpt-4',
messages: messages,
stream: true
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) {
yield content;
}
}
}
// Usage
for await (const text of streamCompletion(messages)) {
// Send each chunk to TTS immediately
ttsService.speak(text);
}
const Anthropic = require('@anthropic-ai/sdk');
const anthropic = new Anthropic({
apiKey: process.env.CLAUDE_API_KEY
});
async function* streamClaude(messages) {
const stream = await anthropic.messages.stream({
model: 'claude-3-sonnet-20240229',
max_tokens: 1024,
messages: messages
});
for await (const event of stream) {
if (event.type === 'content_block_delta') {
yield event.delta.text;
}
}
}
Voice conversations need specific prompting:
const systemPrompt = `You are a helpful phone assistant.
Guidelines:
- Keep responses brief (1-2 sentences when possible)
- Use conversational language, not formal writing
- Don't use markdown, lists, or formatting
- Add a '•' symbol every 5-10 words at natural pauses
- Ask only one question at a time
- If unclear, ask for clarification
Personality: Friendly and helpful, but efficient.`;
Maintain context across turns:
class ConversationContext {
constructor(systemPrompt) {
this.messages = [
{ role: 'system', content: systemPrompt }
];
}
addUserMessage(text) {
this.messages.push({ role: 'user', content: text });
}
addAssistantMessage(text) {
this.messages.push({ role: 'assistant', content: text });
}
getMessages() {
// Keep last N messages to avoid token limits
const maxMessages = 20;
if (this.messages.length > maxMessages) {
return [
this.messages[0], // Keep system prompt
...this.messages.slice(-maxMessages + 1)
];
}
return this.messages;
}
}
Let the LLM trigger actions:
const tools = [
{
type: 'function',
function: {
name: 'transfer_call',
description: 'Transfer the call to a human agent',
parameters: {
type: 'object',
properties: {
department: {
type: 'string',
enum: ['sales', 'support', 'billing']
}
},
required: ['department']
}
}
},
{
type: 'function',
function: {
name: 'check_order_status',
description: 'Check the status of an order',
parameters: {
type: 'object',
properties: {
order_id: { type: 'string' }
},
required: ['order_id']
}
}
}
];
const response = await openai.chat.completions.create({
model: 'gpt-4',
messages: messages,
tools: tools,
tool_choice: 'auto'
});
// Handle function calls
if (response.choices[0].message.tool_calls) {
for (const call of response.choices[0].message.tool_calls) {
const result = await executeFunction(call.function.name, call.function.arguments);
// Add result to conversation and continue
}
}
Don't send incomplete sentences to LLM:
class TranscriptionBuffer {
constructor() {
this.buffer = '';
this.timeout = null;
}
add(text, isFinal) {
clearTimeout(this.timeout);
if (isFinal) {
this.buffer += text + ' ';
return this.flush();
}
// Wait for more input or timeout
this.timeout = setTimeout(() => {
if (this.buffer.trim()) {
return this.flush();
}
}, 1000);
return null;
}
flush() {
const text = this.buffer.trim();
this.buffer = '';
return text || null;
}
}
async function safeCompletion(messages, retries = 3) {
for (let i = 0; i < retries; i++) {
try {
return await streamCompletion(messages);
} catch (error) {
if (error.status === 429) {
// Rate limited, wait and retry
await sleep(Math.pow(2, i) * 1000);
continue;
}
throw error;
}
}
throw new Error('Max retries exceeded');
}